readme

57f6da5c · bailuo · 57f6da5c · 57f6da5c · 57f6da5c · 57f6da5c
Commit 57f6da5c authored Nov 20, 2025 by bailuo
20 changed files
--- a/mmdet/__init__.py
+++ b/mmdet/__init__.py
+from .version import __version__, short_version
+__all__ = ['__version__', 'short_version']
--- a/mmdet/apis/__init__.py
+++ b/mmdet/apis/__init__.py
+from .inference import (async_inference_detector, inference_detector,
+                        init_detector, show_result, show_result_pyplot, show_result_ins)
+from .train import get_root_logger, set_random_seed, train_detector
+__all__ = [
+    'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
+    'async_inference_detector', 'inference_detector', 'show_result',
+    'show_result_pyplot', 'show_result_ins'
+]
--- a/mmdet/apis/inference.py
+++ b/mmdet/apis/inference.py
+import warnings
+import matplotlib.pyplot as plt
+import mmcv
+import numpy as np
+import pycocotools.mask as maskUtils
+import torch
+from mmcv.parallel import collate, scatter
+from mmcv.runner import load_checkpoint
+from mmdet.core import get_classes
+from mmdet.datasets.pipelines import Compose
+from mmdet.models import build_detector
+import cv2
+from scipy import ndimage
+def init_detector(config, checkpoint=None, device='cuda:0'):
+    """Initialize a detector from config file.
+    Args:
+        config (str or :obj:`mmcv.Config`): Config file path or the config
+            object.
+        checkpoint (str, optional): Checkpoint path. If left as None, the model
+            will not load any weights.
+    Returns:
+        nn.Module: The constructed detector.
+    """
+    if isinstance(config, str):
+        config = mmcv.Config.fromfile(config)
+    elif not isinstance(config, mmcv.Config):
+        raise TypeError('config must be a filename or Config object, '
+                        'but got {}'.format(type(config)))
+    config.model.pretrained = None
+    model = build_detector(config.model, test_cfg=config.test_cfg)
+    if checkpoint is not None:
+        checkpoint = load_checkpoint(model, checkpoint)
+        if 'CLASSES' in checkpoint['meta']:
+            model.CLASSES = checkpoint['meta']['CLASSES']
+        else:
+            warnings.warn('Class names are not saved in the checkpoint\'s '
+                          'meta data, use COCO classes by default.')
+            model.CLASSES = get_classes('coco')
+    model.cfg = config  # save the config in the model for convenience
+    model.to(device)
+    model.eval()
+    return model
+class LoadImage(object):
+    def __call__(self, results):
+        if isinstance(results['img'], str):
+            results['filename'] = results['img']
+        else:
+            results['filename'] = None
+        img = mmcv.imread(results['img'])
+        results['img'] = img
+        results['img_shape'] = img.shape
+        results['ori_shape'] = img.shape
+        return results
+def inference_detector(model, img):
+    """Inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
+            images.
+    Returns:
+        If imgs is a str, a generator will be returned, otherwise return the
+        detection results directly.
+    """
+    cfg = model.cfg
+    device = next(model.parameters()).device  # model device
+    # build the data pipeline
+    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
+    test_pipeline = Compose(test_pipeline)
+    # prepare data
+    data = dict(img=img)
+    data = test_pipeline(data)
+    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
+    # forward the model
+    with torch.no_grad():
+        result = model(return_loss=False, rescale=True, **data)
+    return result
+async def async_inference_detector(model, img):
+    """Async inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
+            images.
+    Returns:
+        Awaitable detection results.
+    """
+    cfg = model.cfg
+    device = next(model.parameters()).device  # model device
+    # build the data pipeline
+    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
+    test_pipeline = Compose(test_pipeline)
+    # prepare data
+    data = dict(img=img)
+    data = test_pipeline(data)
+    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
+    # We don't restore `torch.is_grad_enabled()` value during concurrent
+    # inference since execution can overlap
+    torch.set_grad_enabled(False)
+    result = await model.aforward_test(rescale=True, **data)
+    return result
+# TODO: merge this method with the one in BaseDetector
+def show_result(img,
+                result,
+                class_names,
+                score_thr=0.3,
+                wait_time=0,
+                show=True,
+                out_file=None):
+    """Visualize the detection results on the image.
+    Args:
+        img (str or np.ndarray): Image filename or loaded image.
+        result (tuple[list] or list): The detection result, can be either
+            (bbox, segm) or just bbox.
+        class_names (list[str] or tuple[str]): A list of class names.
+        score_thr (float): The threshold to visualize the bboxes and masks.
+        wait_time (int): Value of waitKey param.
+        show (bool, optional): Whether to show the image with opencv or not.
+        out_file (str, optional): If specified, the visualization result will
+            be written to the out file instead of shown in a window.
+    Returns:
+        np.ndarray or None: If neither `show` nor `out_file` is specified, the
+            visualized image is returned, otherwise None is returned.
+    """
+    assert isinstance(class_names, (tuple, list))
+    img = mmcv.imread(img)
+    img = img.copy()
+    if isinstance(result, tuple):
+        bbox_result, segm_result = result
+    else:
+        bbox_result, segm_result = result, None
+    bboxes = np.vstack(bbox_result)
+    labels = [
+        np.full(bbox.shape[0], i, dtype=np.int32)
+        for i, bbox in enumerate(bbox_result)
+    ]
+    labels = np.concatenate(labels)
+    # draw segmentation masks
+    if segm_result is not None:
+        segms = mmcv.concat_list(segm_result)
+        inds = np.where(bboxes[:, -1] > score_thr)[0]
+        np.random.seed(42)
+        color_masks = [
+            np.random.randint(0, 256, (1, 3), dtype=np.uint8)
+            for _ in range(max(labels) + 1)
+        ]
+        for i in inds:
+            i = int(i)
+            color_mask = color_masks[labels[i]]
+            mask = maskUtils.decode(segms[i]).astype(np.bool)
+            img[mask] = img[mask] * 0.5 + color_mask * 0.5
+    # draw bounding boxes
+    mmcv.imshow_det_bboxes(
+        img,
+        bboxes,
+        labels,
+        class_names=class_names,
+        score_thr=score_thr,
+        show=show,
+        wait_time=wait_time,
+        out_file=out_file)
+    if not (show or out_file):
+        return img
+def show_result_pyplot(img,
+                       result,
+                       class_names,
+                       score_thr=0.3,
+                       fig_size=(15, 10)):
+    """Visualize the detection results on the image.
+    Args:
+        img (str or np.ndarray): Image filename or loaded image.
+        result (tuple[list] or list): The detection result, can be either
+            (bbox, segm) or just bbox.
+        class_names (list[str] or tuple[str]): A list of class names.
+        score_thr (float): The threshold to visualize the bboxes and masks.
+        fig_size (tuple): Figure size of the pyplot figure.
+        out_file (str, optional): If specified, the visualization result will
+            be written to the out file instead of shown in a window.
+    """
+    img = show_result(
+        img, result, class_names, score_thr=score_thr, show=False)
+    plt.figure(figsize=fig_size)
+    plt.imshow(mmcv.bgr2rgb(img))
+def show_result_ins(img,
+                    result,
+                    class_names,
+                    score_thr=0.3,
+                    sort_by_density=False,
+                    out_file=None):
+    """Visualize the instance segmentation results on the image.
+    Args:
+        img (str or np.ndarray): Image filename or loaded image.
+        result (tuple[list] or list): The instance segmentation result.
+        class_names (list[str] or tuple[str]): A list of class names.
+        score_thr (float): The threshold to visualize the masks.
+        sort_by_density (bool): sort the masks by their density.
+        out_file (str, optional): If specified, the visualization result will
+            be written to the out file instead of shown in a window.
+    Returns:
+        np.ndarray or None: If neither `show` nor `out_file` is specified, the
+            visualized image is returned, otherwise None is returned.
+    """
+    assert isinstance(class_names, (tuple, list))
+    img = mmcv.imread(img)
+    img_show = img.copy()
+    h, w, _ = img.shape
+    if not result or result == [None]:
+        return img_show
+    cur_result = result[0]
+    seg_label = cur_result[0]
+    seg_label = seg_label.cpu().numpy().astype(np.uint8)
+    cate_label = cur_result[1]
+    cate_label = cate_label.cpu().numpy()
+    score = cur_result[2].cpu().numpy()
+    vis_inds = score > score_thr
+    seg_label = seg_label[vis_inds]
+    num_mask = seg_label.shape[0]
+    cate_label = cate_label[vis_inds]
+    cate_score = score[vis_inds]
+    if sort_by_density:
+        mask_density = []
+        for idx in range(num_mask):
+            cur_mask = seg_label[idx, :, :]
+            cur_mask = mmcv.imresize(cur_mask, (w, h))
+            cur_mask = (cur_mask > 0.5).astype(np.int32)
+            mask_density.append(cur_mask.sum())
+        orders = np.argsort(mask_density)
+        seg_label = seg_label[orders]
+        cate_label = cate_label[orders]
+        cate_score = cate_score[orders]
+    np.random.seed(42)
+    color_masks = [
+        np.random.randint(0, 256, (1, 3), dtype=np.uint8)
+        for _ in range(num_mask)
+    ]
+    for idx in range(num_mask):
+        idx = -(idx+1)
+        cur_mask = seg_label[idx, :, :]
+        cur_mask = mmcv.imresize(cur_mask, (w, h))
+        cur_mask = (cur_mask > 0.5).astype(np.uint8)
+        if cur_mask.sum() == 0:
+            continue
+        color_mask = color_masks[idx]
+        cur_mask_bool = cur_mask.astype(np.bool)
+        img_show[cur_mask_bool] = img[cur_mask_bool] * 0.5 + color_mask * 0.5
+        cur_cate = cate_label[idx]
+        cur_score = cate_score[idx]
+        label_text = class_names[cur_cate]
+        #label_text += '|{:.02f}'.format(cur_score)
+        center_y, center_x = ndimage.measurements.center_of_mass(cur_mask)
+        vis_pos = (max(int(center_x) - 10, 0), int(center_y))
+        cv2.putText(img_show, label_text, vis_pos,
+                        cv2.FONT_HERSHEY_COMPLEX, 0.3, (255, 255, 255))  # green
+    if out_file is None:
+        return img_show
+    else:
+        mmcv.imwrite(img_show, out_file)
--- a/mmdet/apis/train.py
+++ b/mmdet/apis/train.py
+import random
+import re
+from collections import OrderedDict
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import DistSamplerSeedHook, Runner, obj_from_dict
+from mmdet import datasets
+from mmdet.core import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
+                        DistEvalmAPHook, DistOptimizerHook, Fp16OptimizerHook)
+from mmdet.datasets import DATASETS, build_dataloader
+from mmdet.models import RPN
+from mmdet.utils import get_root_logger
+def set_random_seed(seed, deterministic=False):
+    """Set random seed.
+    Args:
+        seed (int): Seed to be used.
+        deterministic (bool): Whether to set the deterministic option for
+            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
+            to True and `torch.backends.cudnn.benchmark` to False.
+            Default: False.
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    if deterministic:
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+def parse_losses(losses):
+    log_vars = OrderedDict()
+    for loss_name, loss_value in losses.items():
+        if isinstance(loss_value, torch.Tensor):
+            log_vars[loss_name] = loss_value.mean()
+        elif isinstance(loss_value, list):
+            log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+        else:
+            raise TypeError(
+                '{} is not a tensor or list of tensors'.format(loss_name))
+    loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
+    log_vars['loss'] = loss
+    for loss_name, loss_value in log_vars.items():
+        # reduce loss when distributed training
+        if dist.is_available() and dist.is_initialized():
+            loss_value = loss_value.data.clone()
+            dist.all_reduce(loss_value.div_(dist.get_world_size()))
+        log_vars[loss_name] = loss_value.item()
+    return loss, log_vars
+def batch_processor(model, data, train_mode):
+    """Process a data batch.
+    This method is required as an argument of Runner, which defines how to
+    process a data batch and obtain proper outputs. The first 3 arguments of
+    batch_processor are fixed.
+    Args:
+        model (nn.Module): A PyTorch model.
+        data (dict): The data batch in a dict.
+        train_mode (bool): Training mode or not. It may be useless for some
+            models.
+    Returns:
+        dict: A dict containing losses and log vars.
+    """
+    losses = model(**data)
+    loss, log_vars = parse_losses(losses)
+    outputs = dict(
+        loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
+    return outputs
+def train_detector(model,
+                   dataset,
+                   cfg,
+                   distributed=False,
+                   validate=False,
+                   timestamp=None):
+    logger = get_root_logger(cfg.log_level)
+    # start training
+    if distributed:
+        _dist_train(
+            model,
+            dataset,
+            cfg,
+            validate=validate,
+            logger=logger,
+            timestamp=timestamp)
+    else:
+        _non_dist_train(
+            model,
+            dataset,
+            cfg,
+            validate=validate,
+            logger=logger,
+            timestamp=timestamp)
+def build_optimizer(model, optimizer_cfg):
+    """Build optimizer from configs.
+    Args:
+        model (:obj:`nn.Module`): The model with parameters to be optimized.
+        optimizer_cfg (dict): The config dict of the optimizer.
+            Positional fields are:
+                - type: class name of the optimizer.
+                - lr: base learning rate.
+            Optional fields are:
+                - any arguments of the corresponding optimizer type, e.g.,
+                  weight_decay, momentum, etc.
+                - paramwise_options: a dict with 3 accepted fileds
+                  (bias_lr_mult, bias_decay_mult, norm_decay_mult).
+                  `bias_lr_mult` and `bias_decay_mult` will be multiplied to
+                  the lr and weight decay respectively for all bias parameters
+                  (except for the normalization layers), and
+                  `norm_decay_mult` will be multiplied to the weight decay
+                  for all weight and bias parameters of normalization layers.
+    Returns:
+        torch.optim.Optimizer: The initialized optimizer.
+    Example:
+        >>> model = torch.nn.modules.Conv1d(1, 1, 1)
+        >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
+        >>>                      weight_decay=0.0001)
+        >>> optimizer = build_optimizer(model, optimizer_cfg)
+    """
+    if hasattr(model, 'module'):
+        model = model.module
+    optimizer_cfg = optimizer_cfg.copy()
+    paramwise_options = optimizer_cfg.pop('paramwise_options', None)
+    # if no paramwise option is specified, just use the global setting
+    if paramwise_options is None:
+        return obj_from_dict(optimizer_cfg, torch.optim,
+                             dict(params=model.parameters()))
+    else:
+        assert isinstance(paramwise_options, dict)
+        # get base lr and weight decay
+        base_lr = optimizer_cfg['lr']
+        base_wd = optimizer_cfg.get('weight_decay', None)
+        # weight_decay must be explicitly specified if mult is specified
+        if ('bias_decay_mult' in paramwise_options
+                or 'norm_decay_mult' in paramwise_options):
+            assert base_wd is not None
+        # get param-wise options
+        bias_lr_mult = paramwise_options.get('bias_lr_mult', 1.)
+        bias_decay_mult = paramwise_options.get('bias_decay_mult', 1.)
+        norm_decay_mult = paramwise_options.get('norm_decay_mult', 1.)
+        # set param-wise lr and weight decay
+        params = []
+        for name, param in model.named_parameters():
+            param_group = {'params': [param]}
+            if not param.requires_grad:
+                # FP16 training needs to copy gradient/weight between master
+                # weight copy and model weight, it is convenient to keep all
+                # parameters here to align with model.parameters()
+                params.append(param_group)
+                continue
+            # for norm layers, overwrite the weight decay of weight and bias
+            # TODO: obtain the norm layer prefixes dynamically
+            if re.search(r'(bn|gn)(\d+)?.(weight|bias)', name):
+                if base_wd is not None:
+                    param_group['weight_decay'] = base_wd * norm_decay_mult
+            # for other layers, overwrite both lr and weight decay of bias
+            elif name.endswith('.bias'):
+                param_group['lr'] = base_lr * bias_lr_mult
+                if base_wd is not None:
+                    param_group['weight_decay'] = base_wd * bias_decay_mult
+            # otherwise use the global settings
+            params.append(param_group)
+        optimizer_cls = getattr(torch.optim, optimizer_cfg.pop('type'))
+        return optimizer_cls(params, **optimizer_cfg)
+def _dist_train(model,
+                dataset,
+                cfg,
+                validate=False,
+                logger=None,
+                timestamp=None):
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    data_loaders = [
+        build_dataloader(
+            ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True)
+        for ds in dataset
+    ]
+    # put model on gpus
+    model = MMDistributedDataParallel(model.cuda())
+    # build runner
+    optimizer = build_optimizer(model, cfg.optimizer)
+    runner = Runner(
+        model, batch_processor, optimizer, cfg.work_dir, logger=logger)
+    # an ugly walkaround to make the .log and .log.json filenames the same
+    runner.timestamp = timestamp
+    # fp16 setting
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
+                                             **fp16_cfg)
+    else:
+        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
+    # register hooks
+    runner.register_training_hooks(cfg.lr_config, optimizer_config,
+                                   cfg.checkpoint_config, cfg.log_config)
+    runner.register_hook(DistSamplerSeedHook())
+    # register eval hooks
+    if validate:
+        val_dataset_cfg = cfg.data.val
+        eval_cfg = cfg.get('evaluation', {})
+        if isinstance(model.module, RPN):
+            # TODO: implement recall hooks for other datasets
+            runner.register_hook(
+                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
+        else:
+            dataset_type = DATASETS.get(val_dataset_cfg.type)
+            if issubclass(dataset_type, datasets.CocoDataset):
+                runner.register_hook(
+                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
+            else:
+                runner.register_hook(
+                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
+def _non_dist_train(model,
+                    dataset,
+                    cfg,
+                    validate=False,
+                    logger=None,
+                    timestamp=None):
+    if validate:
+        raise NotImplementedError('Built-in validation is not implemented '
+                                  'yet in not-distributed training. Use '
+                                  'distributed training or test.py and '
+                                  '*eval.py scripts instead.')
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    data_loaders = [
+        build_dataloader(
+            ds,
+            cfg.data.imgs_per_gpu,
+            cfg.data.workers_per_gpu,
+            cfg.gpus,
+            dist=False) for ds in dataset
+    ]
+    # put model on gpus
+    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
+    # build runner
+    optimizer = build_optimizer(model, cfg.optimizer)
+    runner = Runner(
+        model, batch_processor, optimizer, cfg.work_dir, logger=logger)
+    # an ugly walkaround to make the .log and .log.json filenames the same
+    runner.timestamp = timestamp
+    # fp16 setting
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(
+            **cfg.optimizer_config, **fp16_cfg, distributed=False)
+    else:
+        optimizer_config = cfg.optimizer_config
+    runner.register_training_hooks(cfg.lr_config, optimizer_config,
+                                   cfg.checkpoint_config, cfg.log_config)
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
--- a/mmdet/core/__init__.py
+++ b/mmdet/core/__init__.py
+from .anchor import *  # noqa: F401, F403
+from .bbox import *  # noqa: F401, F403
+from .evaluation import *  # noqa: F401, F403
+from .fp16 import *  # noqa: F401, F403
+from .mask import *  # noqa: F401, F403
+from .post_processing import *  # noqa: F401, F403
+from .utils import *  # noqa: F401, F403
--- a/mmdet/core/anchor/__init__.py
+++ b/mmdet/core/anchor/__init__.py
+from .anchor_generator import AnchorGenerator
+from .anchor_target import (anchor_inside_flags, anchor_target,
+                            images_to_levels, unmap)
+from .guided_anchor_target import ga_loc_target, ga_shape_target
+from .point_generator import PointGenerator
+from .point_target import point_target
+__all__ = [
+    'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target',
+    'ga_shape_target', 'PointGenerator', 'point_target', 'images_to_levels',
+    'unmap'
+]
--- a/mmdet/core/anchor/anchor_generator.py
+++ b/mmdet/core/anchor/anchor_generator.py
+import torch
+class AnchorGenerator(object):
+    """
+    Examples:
+        >>> from mmdet.core import AnchorGenerator
+        >>> self = AnchorGenerator(9, [1.], [1.])
+        >>> all_anchors = self.grid_anchors((2, 2), device='cpu')
+        >>> print(all_anchors)
+        tensor([[ 0.,  0.,  8.,  8.],
+                [16.,  0., 24.,  8.],
+                [ 0., 16.,  8., 24.],
+                [16., 16., 24., 24.]])
+    """
+    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
+        self.base_size = base_size
+        self.scales = torch.Tensor(scales)
+        self.ratios = torch.Tensor(ratios)
+        self.scale_major = scale_major
+        self.ctr = ctr
+        self.base_anchors = self.gen_base_anchors()
+    @property
+    def num_base_anchors(self):
+        return self.base_anchors.size(0)
+    def gen_base_anchors(self):
+        w = self.base_size
+        h = self.base_size
+        if self.ctr is None:
+            x_ctr = 0.5 * (w - 1)
+            y_ctr = 0.5 * (h - 1)
+        else:
+            x_ctr, y_ctr = self.ctr
+        h_ratios = torch.sqrt(self.ratios)
+        w_ratios = 1 / h_ratios
+        if self.scale_major:
+            ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
+            hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
+        else:
+            ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
+            hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
+        # yapf: disable
+        base_anchors = torch.stack(
+            [
+                x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
+                x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
+            ],
+            dim=-1).round()
+        # yapf: enable
+        return base_anchors
+    def _meshgrid(self, x, y, row_major=True):
+        xx = x.repeat(len(y))
+        yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
+        if row_major:
+            return xx, yy
+        else:
+            return yy, xx
+    def grid_anchors(self, featmap_size, stride=16, device='cuda'):
+        base_anchors = self.base_anchors.to(device)
+        feat_h, feat_w = featmap_size
+        shift_x = torch.arange(0, feat_w, device=device) * stride
+        shift_y = torch.arange(0, feat_h, device=device) * stride
+        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
+        shifts = shifts.type_as(base_anchors)
+        # first feat_w elements correspond to the first row of shifts
+        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
+        # shifted anchors (K, A, 4), reshape to (K*A, 4)
+        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
+        all_anchors = all_anchors.view(-1, 4)
+        # first A rows correspond to A anchors of (0, 0) in feature map,
+        # then (0, 1), (0, 2), ...
+        return all_anchors
+    def valid_flags(self, featmap_size, valid_size, device='cuda'):
+        feat_h, feat_w = featmap_size
+        valid_h, valid_w = valid_size
+        assert valid_h <= feat_h and valid_w <= feat_w
+        valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
+        valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
+        valid_x[:valid_w] = 1
+        valid_y[:valid_h] = 1
+        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+        valid = valid_xx & valid_yy
+        valid = valid[:,
+                      None].expand(valid.size(0),
+                                   self.num_base_anchors).contiguous().view(-1)
+        return valid
--- a/mmdet/core/anchor/anchor_target.py
+++ b/mmdet/core/anchor/anchor_target.py
+import torch
+from ..bbox import PseudoSampler, assign_and_sample, bbox2delta, build_assigner
+from ..utils import multi_apply
+def anchor_target(anchor_list,
+                  valid_flag_list,
+                  gt_bboxes_list,
+                  img_metas,
+                  target_means,
+                  target_stds,
+                  cfg,
+                  gt_bboxes_ignore_list=None,
+                  gt_labels_list=None,
+                  label_channels=1,
+                  sampling=True,
+                  unmap_outputs=True):
+    """Compute regression and classification targets for anchors.
+    Args:
+        anchor_list (list[list]): Multi level anchors of each image.
+        valid_flag_list (list[list]): Multi level valid flags of each image.
+        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
+        img_metas (list[dict]): Meta info of each image.
+        target_means (Iterable): Mean value of regression targets.
+        target_stds (Iterable): Std value of regression targets.
+        cfg (dict): RPN train configs.
+    Returns:
+        tuple
+    """
+    num_imgs = len(img_metas)
+    assert len(anchor_list) == len(valid_flag_list) == num_imgs
+    # anchor number of multi levels
+    num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
+    # concat all level anchors and flags to a single tensor
+    for i in range(num_imgs):
+        assert len(anchor_list[i]) == len(valid_flag_list[i])
+        anchor_list[i] = torch.cat(anchor_list[i])
+        valid_flag_list[i] = torch.cat(valid_flag_list[i])
+    # compute targets for each image
+    if gt_bboxes_ignore_list is None:
+        gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
+    if gt_labels_list is None:
+        gt_labels_list = [None for _ in range(num_imgs)]
+    (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
+     pos_inds_list, neg_inds_list) = multi_apply(
+         anchor_target_single,
+         anchor_list,
+         valid_flag_list,
+         gt_bboxes_list,
+         gt_bboxes_ignore_list,
+         gt_labels_list,
+         img_metas,
+         target_means=target_means,
+         target_stds=target_stds,
+         cfg=cfg,
+         label_channels=label_channels,
+         sampling=sampling,
+         unmap_outputs=unmap_outputs)
+    # no valid anchors
+    if any([labels is None for labels in all_labels]):
+        return None
+    # sampled anchors of all images
+    num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
+    num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
+    # split targets to a list w.r.t. multiple levels
+    labels_list = images_to_levels(all_labels, num_level_anchors)
+    label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
+    bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
+    bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)
+    return (labels_list, label_weights_list, bbox_targets_list,
+            bbox_weights_list, num_total_pos, num_total_neg)
+def images_to_levels(target, num_level_anchors):
+    """Convert targets by image to targets by feature level.
+    [target_img0, target_img1] -> [target_level0, target_level1, ...]
+    """
+    target = torch.stack(target, 0)
+    level_targets = []
+    start = 0
+    for n in num_level_anchors:
+        end = start + n
+        level_targets.append(target[:, start:end].squeeze(0))
+        start = end
+    return level_targets
+def anchor_target_single(flat_anchors,
+                         valid_flags,
+                         gt_bboxes,
+                         gt_bboxes_ignore,
+                         gt_labels,
+                         img_meta,
+                         target_means,
+                         target_stds,
+                         cfg,
+                         label_channels=1,
+                         sampling=True,
+                         unmap_outputs=True):
+    inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
+                                       img_meta['img_shape'][:2],
+                                       cfg.allowed_border)
+    if not inside_flags.any():
+        return (None, ) * 6
+    # assign gt and sample anchors
+    anchors = flat_anchors[inside_flags, :]
+    if sampling:
+        assign_result, sampling_result = assign_and_sample(
+            anchors, gt_bboxes, gt_bboxes_ignore, None, cfg)
+    else:
+        bbox_assigner = build_assigner(cfg.assigner)
+        assign_result = bbox_assigner.assign(anchors, gt_bboxes,
+                                             gt_bboxes_ignore, gt_labels)
+        bbox_sampler = PseudoSampler()
+        sampling_result = bbox_sampler.sample(assign_result, anchors,
+                                              gt_bboxes)
+    num_valid_anchors = anchors.shape[0]
+    bbox_targets = torch.zeros_like(anchors)
+    bbox_weights = torch.zeros_like(anchors)
+    labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
+    label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
+    pos_inds = sampling_result.pos_inds
+    neg_inds = sampling_result.neg_inds
+    if len(pos_inds) > 0:
+        pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes,
+                                      sampling_result.pos_gt_bboxes,
+                                      target_means, target_stds)
+        bbox_targets[pos_inds, :] = pos_bbox_targets
+        bbox_weights[pos_inds, :] = 1.0
+        if gt_labels is None:
+            labels[pos_inds] = 1
+        else:
+            labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
+        if cfg.pos_weight <= 0:
+            label_weights[pos_inds] = 1.0
+        else:
+            label_weights[pos_inds] = cfg.pos_weight
+    if len(neg_inds) > 0:
+        label_weights[neg_inds] = 1.0
+    # map up to original set of anchors
+    if unmap_outputs:
+        num_total_anchors = flat_anchors.size(0)
+        labels = unmap(labels, num_total_anchors, inside_flags)
+        label_weights = unmap(label_weights, num_total_anchors, inside_flags)
+        bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
+        bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
+    return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
+            neg_inds)
+def anchor_inside_flags(flat_anchors,
+                        valid_flags,
+                        img_shape,
+                        allowed_border=0):
+    img_h, img_w = img_shape[:2]
+    if allowed_border >= 0:
+        inside_flags = valid_flags & \
+            (flat_anchors[:, 0] >= -allowed_border).type(torch.uint8) & \
+            (flat_anchors[:, 1] >= -allowed_border).type(torch.uint8) & \
+            (flat_anchors[:, 2] < img_w + allowed_border).type(torch.uint8) & \
+            (flat_anchors[:, 3] < img_h + allowed_border).type(torch.uint8)
+    else:
+        inside_flags = valid_flags
+    return inside_flags
+def unmap(data, count, inds, fill=0):
+    """ Unmap a subset of item (data) back to the original set of items (of
+    size count) """
+    if data.dim() == 1:
+        ret = data.new_full((count, ), fill)
+        ret[inds] = data
+    else:
+        new_size = (count, ) + data.size()[1:]
+        ret = data.new_full(new_size, fill)
+        ret[inds, :] = data
+    return ret
--- a/mmdet/core/anchor/guided_anchor_target.py
+++ b/mmdet/core/anchor/guided_anchor_target.py
+import torch
+from ..bbox import PseudoSampler, build_assigner, build_sampler
+from ..utils import multi_apply, unmap
+def calc_region(bbox, ratio, featmap_size=None):
+    """Calculate a proportional bbox region.
+    The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
+    Args:
+        bbox (Tensor): Bboxes to calculate regions, shape (n, 4)
+        ratio (float): Ratio of the output region.
+        featmap_size (tuple): Feature map size used for clipping the boundary.
+    Returns:
+        tuple: x1, y1, x2, y2
+    """
+    x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
+    y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
+    x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
+    y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
+    if featmap_size is not None:
+        x1 = x1.clamp(min=0, max=featmap_size[1] - 1)
+        y1 = y1.clamp(min=0, max=featmap_size[0] - 1)
+        x2 = x2.clamp(min=0, max=featmap_size[1] - 1)
+        y2 = y2.clamp(min=0, max=featmap_size[0] - 1)
+    return (x1, y1, x2, y2)
+def ga_loc_target(gt_bboxes_list,
+                  featmap_sizes,
+                  anchor_scale,
+                  anchor_strides,
+                  center_ratio=0.2,
+                  ignore_ratio=0.5):
+    """Compute location targets for guided anchoring.
+    Each feature map is divided into positive, negative and ignore regions.
+    - positive regions: target 1, weight 1
+    - ignore regions: target 0, weight 0
+    - negative regions: target 0, weight 0.1
+    Args:
+        gt_bboxes_list (list[Tensor]): Gt bboxes of each image.
+        featmap_sizes (list[tuple]): Multi level sizes of each feature maps.
+        anchor_scale (int): Anchor scale.
+        anchor_strides ([list[int]]): Multi level anchor strides.
+        center_ratio (float): Ratio of center region.
+        ignore_ratio (float): Ratio of ignore region.
+    Returns:
+        tuple
+    """
+    img_per_gpu = len(gt_bboxes_list)
+    num_lvls = len(featmap_sizes)
+    r1 = (1 - center_ratio) / 2
+    r2 = (1 - ignore_ratio) / 2
+    all_loc_targets = []
+    all_loc_weights = []
+    all_ignore_map = []
+    for lvl_id in range(num_lvls):
+        h, w = featmap_sizes[lvl_id]
+        loc_targets = torch.zeros(
+            img_per_gpu,
+            1,
+            h,
+            w,
+            device=gt_bboxes_list[0].device,
+            dtype=torch.float32)
+        loc_weights = torch.full_like(loc_targets, -1)
+        ignore_map = torch.zeros_like(loc_targets)
+        all_loc_targets.append(loc_targets)
+        all_loc_weights.append(loc_weights)
+        all_ignore_map.append(ignore_map)
+    for img_id in range(img_per_gpu):
+        gt_bboxes = gt_bboxes_list[img_id]
+        scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) *
+                           (gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1))
+        min_anchor_size = scale.new_full(
+            (1, ), float(anchor_scale * anchor_strides[0]))
+        # assign gt bboxes to different feature levels w.r.t. their scales
+        target_lvls = torch.floor(
+            torch.log2(scale) - torch.log2(min_anchor_size) + 0.5)
+        target_lvls = target_lvls.clamp(min=0, max=num_lvls - 1).long()
+        for gt_id in range(gt_bboxes.size(0)):
+            lvl = target_lvls[gt_id].item()
+            # rescaled to corresponding feature map
+            gt_ = gt_bboxes[gt_id, :4] / anchor_strides[lvl]
+            # calculate ignore regions
+            ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
+                gt_, r2, featmap_sizes[lvl])
+            # calculate positive (center) regions
+            ctr_x1, ctr_y1, ctr_x2, ctr_y2 = calc_region(
+                gt_, r1, featmap_sizes[lvl])
+            all_loc_targets[lvl][img_id, 0, ctr_y1:ctr_y2 + 1,
+                                 ctr_x1:ctr_x2 + 1] = 1
+            all_loc_weights[lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
+                                 ignore_x1:ignore_x2 + 1] = 0
+            all_loc_weights[lvl][img_id, 0, ctr_y1:ctr_y2 + 1,
+                                 ctr_x1:ctr_x2 + 1] = 1
+            # calculate ignore map on nearby low level feature
+            if lvl > 0:
+                d_lvl = lvl - 1
+                # rescaled to corresponding feature map
+                gt_ = gt_bboxes[gt_id, :4] / anchor_strides[d_lvl]
+                ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
+                    gt_, r2, featmap_sizes[d_lvl])
+                all_ignore_map[d_lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
+                                      ignore_x1:ignore_x2 + 1] = 1
+            # calculate ignore map on nearby high level feature
+            if lvl < num_lvls - 1:
+                u_lvl = lvl + 1
+                # rescaled to corresponding feature map
+                gt_ = gt_bboxes[gt_id, :4] / anchor_strides[u_lvl]
+                ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
+                    gt_, r2, featmap_sizes[u_lvl])
+                all_ignore_map[u_lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
+                                      ignore_x1:ignore_x2 + 1] = 1
+    for lvl_id in range(num_lvls):
+        # ignore negative regions w.r.t. ignore map
+        all_loc_weights[lvl_id][(all_loc_weights[lvl_id] < 0)
+                                & (all_ignore_map[lvl_id] > 0)] = 0
+        # set negative regions with weight 0.1
+        all_loc_weights[lvl_id][all_loc_weights[lvl_id] < 0] = 0.1
+    # loc average factor to balance loss
+    loc_avg_factor = sum(
+        [t.size(0) * t.size(-1) * t.size(-2) for t in all_loc_targets]) / 200
+    return all_loc_targets, all_loc_weights, loc_avg_factor
+def ga_shape_target(approx_list,
+                    inside_flag_list,
+                    square_list,
+                    gt_bboxes_list,
+                    img_metas,
+                    approxs_per_octave,
+                    cfg,
+                    gt_bboxes_ignore_list=None,
+                    sampling=True,
+                    unmap_outputs=True):
+    """Compute guided anchoring targets.
+    Args:
+        approx_list (list[list]): Multi level approxs of each image.
+        inside_flag_list (list[list]): Multi level inside flags of each image.
+        square_list (list[list]): Multi level squares of each image.
+        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
+        img_metas (list[dict]): Meta info of each image.
+        approxs_per_octave (int): number of approxs per octave
+        cfg (dict): RPN train configs.
+        gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes.
+        sampling (bool): sampling or not.
+        unmap_outputs (bool): unmap outputs or not.
+    Returns:
+        tuple
+    """
+    num_imgs = len(img_metas)
+    assert len(approx_list) == len(inside_flag_list) == len(
+        square_list) == num_imgs
+    # anchor number of multi levels
+    num_level_squares = [squares.size(0) for squares in square_list[0]]
+    # concat all level anchors and flags to a single tensor
+    inside_flag_flat_list = []
+    approx_flat_list = []
+    square_flat_list = []
+    for i in range(num_imgs):
+        assert len(square_list[i]) == len(inside_flag_list[i])
+        inside_flag_flat_list.append(torch.cat(inside_flag_list[i]))
+        approx_flat_list.append(torch.cat(approx_list[i]))
+        square_flat_list.append(torch.cat(square_list[i]))
+    # compute targets for each image
+    if gt_bboxes_ignore_list is None:
+        gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
+    (all_bbox_anchors, all_bbox_gts, all_bbox_weights, pos_inds_list,
+     neg_inds_list) = multi_apply(
+         ga_shape_target_single,
+         approx_flat_list,
+         inside_flag_flat_list,
+         square_flat_list,
+         gt_bboxes_list,
+         gt_bboxes_ignore_list,
+         img_metas,
+         approxs_per_octave=approxs_per_octave,
+         cfg=cfg,
+         sampling=sampling,
+         unmap_outputs=unmap_outputs)
+    # no valid anchors
+    if any([bbox_anchors is None for bbox_anchors in all_bbox_anchors]):
+        return None
+    # sampled anchors of all images
+    num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
+    num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
+    # split targets to a list w.r.t. multiple levels
+    bbox_anchors_list = images_to_levels(all_bbox_anchors, num_level_squares)
+    bbox_gts_list = images_to_levels(all_bbox_gts, num_level_squares)
+    bbox_weights_list = images_to_levels(all_bbox_weights, num_level_squares)
+    return (bbox_anchors_list, bbox_gts_list, bbox_weights_list, num_total_pos,
+            num_total_neg)
+def images_to_levels(target, num_level_anchors):
+    """Convert targets by image to targets by feature level.
+    [target_img0, target_img1] -> [target_level0, target_level1, ...]
+    """
+    target = torch.stack(target, 0)
+    level_targets = []
+    start = 0
+    for n in num_level_anchors:
+        end = start + n
+        level_targets.append(target[:, start:end].squeeze(0))
+        start = end
+    return level_targets
+def ga_shape_target_single(flat_approxs,
+                           inside_flags,
+                           flat_squares,
+                           gt_bboxes,
+                           gt_bboxes_ignore,
+                           img_meta,
+                           approxs_per_octave,
+                           cfg,
+                           sampling=True,
+                           unmap_outputs=True):
+    """Compute guided anchoring targets.
+    This function returns sampled anchors and gt bboxes directly
+    rather than calculates regression targets.
+    Args:
+        flat_approxs (Tensor): flat approxs of a single image,
+            shape (n, 4)
+        inside_flags (Tensor): inside flags of a single image,
+            shape (n, ).
+        flat_squares (Tensor): flat squares of a single image,
+            shape (approxs_per_octave * n, 4)
+        gt_bboxes (Tensor): Ground truth bboxes of a single image.
+        img_meta (dict): Meta info of a single image.
+        approxs_per_octave (int): number of approxs per octave
+        cfg (dict): RPN train configs.
+        sampling (bool): sampling or not.
+        unmap_outputs (bool): unmap outputs or not.
+    Returns:
+        tuple
+    """
+    if not inside_flags.any():
+        return (None, ) * 5
+    # assign gt and sample anchors
+    expand_inside_flags = inside_flags[:, None].expand(
+        -1, approxs_per_octave).reshape(-1)
+    approxs = flat_approxs[expand_inside_flags, :]
+    squares = flat_squares[inside_flags, :]
+    bbox_assigner = build_assigner(cfg.ga_assigner)
+    assign_result = bbox_assigner.assign(approxs, squares, approxs_per_octave,
+                                         gt_bboxes, gt_bboxes_ignore)
+    if sampling:
+        bbox_sampler = build_sampler(cfg.ga_sampler)
+    else:
+        bbox_sampler = PseudoSampler()
+    sampling_result = bbox_sampler.sample(assign_result, squares, gt_bboxes)
+    bbox_anchors = torch.zeros_like(squares)
+    bbox_gts = torch.zeros_like(squares)
+    bbox_weights = torch.zeros_like(squares)
+    pos_inds = sampling_result.pos_inds
+    neg_inds = sampling_result.neg_inds
+    if len(pos_inds) > 0:
+        bbox_anchors[pos_inds, :] = sampling_result.pos_bboxes
+        bbox_gts[pos_inds, :] = sampling_result.pos_gt_bboxes
+        bbox_weights[pos_inds, :] = 1.0
+    # map up to original set of anchors
+    if unmap_outputs:
+        num_total_anchors = flat_squares.size(0)
+        bbox_anchors = unmap(bbox_anchors, num_total_anchors, inside_flags)
+        bbox_gts = unmap(bbox_gts, num_total_anchors, inside_flags)
+        bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
+    return (bbox_anchors, bbox_gts, bbox_weights, pos_inds, neg_inds)
--- a/mmdet/core/anchor/point_generator.py
+++ b/mmdet/core/anchor/point_generator.py
+import torch
+class PointGenerator(object):
+    def _meshgrid(self, x, y, row_major=True):
+        xx = x.repeat(len(y))
+        yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
+        if row_major:
+            return xx, yy
+        else:
+            return yy, xx
+    def grid_points(self, featmap_size, stride=16, device='cuda'):
+        feat_h, feat_w = featmap_size
+        shift_x = torch.arange(0., feat_w, device=device) * stride
+        shift_y = torch.arange(0., feat_h, device=device) * stride
+        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+        stride = shift_x.new_full((shift_xx.shape[0], ), stride)
+        shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
+        all_points = shifts.to(device)
+        return all_points
+    def valid_flags(self, featmap_size, valid_size, device='cuda'):
+        feat_h, feat_w = featmap_size
+        valid_h, valid_w = valid_size
+        assert valid_h <= feat_h and valid_w <= feat_w
+        valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
+        valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
+        valid_x[:valid_w] = 1
+        valid_y[:valid_h] = 1
+        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+        valid = valid_xx & valid_yy
+        return valid
--- a/mmdet/core/anchor/point_target.py
+++ b/mmdet/core/anchor/point_target.py
+import torch
+from ..bbox import PseudoSampler, assign_and_sample, build_assigner
+from ..utils import multi_apply
+def point_target(proposals_list,
+                 valid_flag_list,
+                 gt_bboxes_list,
+                 img_metas,
+                 cfg,
+                 gt_bboxes_ignore_list=None,
+                 gt_labels_list=None,
+                 label_channels=1,
+                 sampling=True,
+                 unmap_outputs=True):
+    """Compute corresponding GT box and classification targets for proposals.
+    Args:
+        points_list (list[list]): Multi level points of each image.
+        valid_flag_list (list[list]): Multi level valid flags of each image.
+        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
+        img_metas (list[dict]): Meta info of each image.
+        cfg (dict): train sample configs.
+    Returns:
+        tuple
+    """
+    num_imgs = len(img_metas)
+    assert len(proposals_list) == len(valid_flag_list) == num_imgs
+    # points number of multi levels
+    num_level_proposals = [points.size(0) for points in proposals_list[0]]
+    # concat all level points and flags to a single tensor
+    for i in range(num_imgs):
+        assert len(proposals_list[i]) == len(valid_flag_list[i])
+        proposals_list[i] = torch.cat(proposals_list[i])
+        valid_flag_list[i] = torch.cat(valid_flag_list[i])
+    # compute targets for each image
+    if gt_bboxes_ignore_list is None:
+        gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
+    if gt_labels_list is None:
+        gt_labels_list = [None for _ in range(num_imgs)]
+    (all_labels, all_label_weights, all_bbox_gt, all_proposals,
+     all_proposal_weights, pos_inds_list, neg_inds_list) = multi_apply(
+         point_target_single,
+         proposals_list,
+         valid_flag_list,
+         gt_bboxes_list,
+         gt_bboxes_ignore_list,
+         gt_labels_list,
+         cfg=cfg,
+         label_channels=label_channels,
+         sampling=sampling,
+         unmap_outputs=unmap_outputs)
+    # no valid points
+    if any([labels is None for labels in all_labels]):
+        return None
+    # sampled points of all images
+    num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
+    num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
+    labels_list = images_to_levels(all_labels, num_level_proposals)
+    label_weights_list = images_to_levels(all_label_weights,
+                                          num_level_proposals)
+    bbox_gt_list = images_to_levels(all_bbox_gt, num_level_proposals)
+    proposals_list = images_to_levels(all_proposals, num_level_proposals)
+    proposal_weights_list = images_to_levels(all_proposal_weights,
+                                             num_level_proposals)
+    return (labels_list, label_weights_list, bbox_gt_list, proposals_list,
+            proposal_weights_list, num_total_pos, num_total_neg)
+def images_to_levels(target, num_level_grids):
+    """Convert targets by image to targets by feature level.
+    [target_img0, target_img1] -> [target_level0, target_level1, ...]
+    """
+    target = torch.stack(target, 0)
+    level_targets = []
+    start = 0
+    for n in num_level_grids:
+        end = start + n
+        level_targets.append(target[:, start:end].squeeze(0))
+        start = end
+    return level_targets
+def point_target_single(flat_proposals,
+                        valid_flags,
+                        gt_bboxes,
+                        gt_bboxes_ignore,
+                        gt_labels,
+                        cfg,
+                        label_channels=1,
+                        sampling=True,
+                        unmap_outputs=True):
+    inside_flags = valid_flags
+    if not inside_flags.any():
+        return (None, ) * 7
+    # assign gt and sample proposals
+    proposals = flat_proposals[inside_flags, :]
+    if sampling:
+        assign_result, sampling_result = assign_and_sample(
+            proposals, gt_bboxes, gt_bboxes_ignore, None, cfg)
+    else:
+        bbox_assigner = build_assigner(cfg.assigner)
+        assign_result = bbox_assigner.assign(proposals, gt_bboxes,
+                                             gt_bboxes_ignore, gt_labels)
+        bbox_sampler = PseudoSampler()
+        sampling_result = bbox_sampler.sample(assign_result, proposals,
+                                              gt_bboxes)
+    num_valid_proposals = proposals.shape[0]
+    bbox_gt = proposals.new_zeros([num_valid_proposals, 4])
+    pos_proposals = torch.zeros_like(proposals)
+    proposals_weights = proposals.new_zeros([num_valid_proposals, 4])
+    labels = proposals.new_zeros(num_valid_proposals, dtype=torch.long)
+    label_weights = proposals.new_zeros(num_valid_proposals, dtype=torch.float)
+    pos_inds = sampling_result.pos_inds
+    neg_inds = sampling_result.neg_inds
+    if len(pos_inds) > 0:
+        pos_gt_bboxes = sampling_result.pos_gt_bboxes
+        bbox_gt[pos_inds, :] = pos_gt_bboxes
+        pos_proposals[pos_inds, :] = proposals[pos_inds, :]
+        proposals_weights[pos_inds, :] = 1.0
+        if gt_labels is None:
+            labels[pos_inds] = 1
+        else:
+            labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
+        if cfg.pos_weight <= 0:
+            label_weights[pos_inds] = 1.0
+        else:
+            label_weights[pos_inds] = cfg.pos_weight
+    if len(neg_inds) > 0:
+        label_weights[neg_inds] = 1.0
+    # map up to original set of proposals
+    if unmap_outputs:
+        num_total_proposals = flat_proposals.size(0)
+        labels = unmap(labels, num_total_proposals, inside_flags)
+        label_weights = unmap(label_weights, num_total_proposals, inside_flags)
+        bbox_gt = unmap(bbox_gt, num_total_proposals, inside_flags)
+        pos_proposals = unmap(pos_proposals, num_total_proposals, inside_flags)
+        proposals_weights = unmap(proposals_weights, num_total_proposals,
+                                  inside_flags)
+    return (labels, label_weights, bbox_gt, pos_proposals, proposals_weights,
+            pos_inds, neg_inds)
+def unmap(data, count, inds, fill=0):
+    """ Unmap a subset of item (data) back to the original set of items (of
+    size count) """
+    if data.dim() == 1:
+        ret = data.new_full((count, ), fill)
+        ret[inds] = data
+    else:
+        new_size = (count, ) + data.size()[1:]
+        ret = data.new_full(new_size, fill)
+        ret[inds, :] = data
+    return ret
--- a/mmdet/core/bbox/__init__.py
+++ b/mmdet/core/bbox/__init__.py
+from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
+from .bbox_target import bbox_target
+from .geometry import bbox_overlaps
+from .samplers import (BaseSampler, CombinedSampler,
+                       InstanceBalancedPosSampler, IoUBalancedNegSampler,
+                       PseudoSampler, RandomSampler, SamplingResult)
+from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip,
+                         bbox_mapping, bbox_mapping_back, delta2bbox,
+                         distance2bbox, roi2bbox)
+from .assign_sampling import (  # isort:skip, avoid recursive imports
+    assign_and_sample, build_assigner, build_sampler)
+__all__ = [
+    'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
+    'BaseSampler', 'PseudoSampler', 'RandomSampler',
+    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
+    'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
+    'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
+    'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
+    'distance2bbox', 'bbox_target'
+]
--- a/mmdet/core/bbox/assign_sampling.py
+++ b/mmdet/core/bbox/assign_sampling.py
+import mmcv
+from . import assigners, samplers
+def build_assigner(cfg, **kwargs):
+    if isinstance(cfg, assigners.BaseAssigner):
+        return cfg
+    elif isinstance(cfg, dict):
+        return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs)
+    else:
+        raise TypeError('Invalid type {} for building a sampler'.format(
+            type(cfg)))
+def build_sampler(cfg, **kwargs):
+    if isinstance(cfg, samplers.BaseSampler):
+        return cfg
+    elif isinstance(cfg, dict):
+        return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs)
+    else:
+        raise TypeError('Invalid type {} for building a sampler'.format(
+            type(cfg)))
+def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
+    bbox_assigner = build_assigner(cfg.assigner)
+    bbox_sampler = build_sampler(cfg.sampler)
+    assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
+                                         gt_labels)
+    sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
+                                          gt_labels)
+    return assign_result, sampling_result
--- a/mmdet/core/bbox/assigners/__init__.py
+++ b/mmdet/core/bbox/assigners/__init__.py
+from .approx_max_iou_assigner import ApproxMaxIoUAssigner
+from .assign_result import AssignResult
+from .atss_assigner import ATSSAssigner
+from .base_assigner import BaseAssigner
+from .max_iou_assigner import MaxIoUAssigner
+from .point_assigner import PointAssigner
+__all__ = [
+    'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
+    'PointAssigner', 'ATSSAssigner'
+]
--- a/mmdet/core/bbox/assigners/approx_max_iou_assigner.py
+++ b/mmdet/core/bbox/assigners/approx_max_iou_assigner.py
+import torch
+from ..geometry import bbox_overlaps
+from .max_iou_assigner import MaxIoUAssigner
+class ApproxMaxIoUAssigner(MaxIoUAssigner):
+    """Assign a corresponding gt bbox or background to each bbox.
+    Each proposals will be assigned with `-1`, `0`, or a positive integer
+    indicating the ground truth index.
+    - -1: don't care
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    Args:
+        pos_iou_thr (float): IoU threshold for positive bboxes.
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
+        min_pos_iou (float): Minimum iou for a bbox to be considered as a
+            positive bbox. Positive samples can have smaller IoU than
+            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
+        gt_max_assign_all (bool): Whether to assign all bboxes with the same
+            highest overlap with some gt to that gt.
+        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
+            `gt_bboxes_ignore` is specified). Negative values mean not
+            ignoring any bboxes.
+        ignore_wrt_candidates (bool): Whether to compute the iof between
+            `bboxes` and `gt_bboxes_ignore`, or the contrary.
+        gpu_assign_thr (int): The upper bound of the number of GT for GPU
+            assign. When the number of gt is above this threshold, will assign
+            on CPU device. Negative values mean not assign on CPU.
+    """
+    def __init__(self,
+                 pos_iou_thr,
+                 neg_iou_thr,
+                 min_pos_iou=.0,
+                 gt_max_assign_all=True,
+                 ignore_iof_thr=-1,
+                 ignore_wrt_candidates=True,
+                 gpu_assign_thr=-1):
+        self.pos_iou_thr = pos_iou_thr
+        self.neg_iou_thr = neg_iou_thr
+        self.min_pos_iou = min_pos_iou
+        self.gt_max_assign_all = gt_max_assign_all
+        self.ignore_iof_thr = ignore_iof_thr
+        self.ignore_wrt_candidates = ignore_wrt_candidates
+        self.gpu_assign_thr = gpu_assign_thr
+    def assign(self,
+               approxs,
+               squares,
+               approxs_per_octave,
+               gt_bboxes,
+               gt_bboxes_ignore=None,
+               gt_labels=None):
+        """Assign gt to approxs.
+        This method assign a gt bbox to each group of approxs (bboxes),
+        each group of approxs is represent by a base approx (bbox) and
+        will be assigned with -1, 0, or a positive number.
+        -1 means don't care, 0 means negative sample,
+        positive number is the index (1-based) of assigned gt.
+        The assignment is done in following steps, the order matters.
+        1. assign every bbox to -1
+        2. use the max IoU of each group of approxs to assign
+        2. assign proposals whose iou with all gts < neg_iou_thr to 0
+        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
+           assign it to that bbox
+        4. for each gt bbox, assign its nearest proposals (may be more than
+           one) to itself
+        Args:
+            approxs (Tensor): Bounding boxes to be assigned,
+                shape(approxs_per_octave*n, 4).
+            squares (Tensor): Base Bounding boxes to be assigned,
+                shape(n, 4).
+            approxs_per_octave (int): number of approxs per octave
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        num_squares = squares.size(0)
+        num_gts = gt_bboxes.size(0)
+        if num_squares == 0 or num_gts == 0:
+            # No predictions and/or truth, return empty assignment
+            overlaps = approxs.new(num_gts, num_squares)
+            assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+            return assign_result
+        # re-organize anchors by approxs_per_octave x num_squares
+        approxs = torch.transpose(
+            approxs.view(num_squares, approxs_per_octave, 4), 0,
+            1).contiguous().view(-1, 4)
+        assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
+            num_gts > self.gpu_assign_thr) else False
+        # compute overlap and assign gt on CPU when number of GT is large
+        if assign_on_cpu:
+            device = approxs.device
+            approxs = approxs.cpu()
+            gt_bboxes = gt_bboxes.cpu()
+            if gt_bboxes_ignore is not None:
+                gt_bboxes_ignore = gt_bboxes_ignore.cpu()
+            if gt_labels is not None:
+                gt_labels = gt_labels.cpu()
+        all_overlaps = bbox_overlaps(approxs, gt_bboxes)
+        overlaps, _ = all_overlaps.view(approxs_per_octave, num_squares,
+                                        num_gts).max(dim=0)
+        overlaps = torch.transpose(overlaps, 0, 1)
+        bboxes = squares[:, :4]
+        if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
+                gt_bboxes_ignore.numel() > 0):
+            if self.ignore_wrt_candidates:
+                ignore_overlaps = bbox_overlaps(
+                    bboxes, gt_bboxes_ignore, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
+            else:
+                ignore_overlaps = bbox_overlaps(
+                    gt_bboxes_ignore, bboxes, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
+            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
+        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+        if assign_on_cpu:
+            assign_result.gt_inds = assign_result.gt_inds.to(device)
+            assign_result.max_overlaps = assign_result.max_overlaps.to(device)
+            if assign_result.labels is not None:
+                assign_result.labels = assign_result.labels.to(device)
+        return assign_result
--- a/mmdet/core/bbox/assigners/assign_result.py
+++ b/mmdet/core/bbox/assigners/assign_result.py
+import torch
+from mmdet.utils import util_mixins
+class AssignResult(util_mixins.NiceRepr):
+    """
+    Stores assignments between predicted and truth boxes.
+    Attributes:
+        num_gts (int): the number of truth boxes considered when computing this
+            assignment
+        gt_inds (LongTensor): for each predicted box indicates the 1-based
+            index of the assigned truth box. 0 means unassigned and -1 means
+            ignore.
+        max_overlaps (FloatTensor): the iou between the predicted box and its
+            assigned truth box.
+        labels (None | LongTensor): If specified, for each predicted box
+            indicates the category label of the assigned truth box.
+    Example:
+        >>> # An assign result between 4 predicted boxes and 9 true boxes
+        >>> # where only two boxes were assigned.
+        >>> num_gts = 9
+        >>> max_overlaps = torch.LongTensor([0, .5, .9, 0])
+        >>> gt_inds = torch.LongTensor([-1, 1, 2, 0])
+        >>> labels = torch.LongTensor([0, 3, 4, 0])
+        >>> self = AssignResult(num_gts, gt_inds, max_overlaps, labels)
+        >>> print(str(self))  # xdoctest: +IGNORE_WANT
+        <AssignResult(num_gts=9, gt_inds.shape=(4,), max_overlaps.shape=(4,),
+                      labels.shape=(4,))>
+        >>> # Force addition of gt labels (when adding gt as proposals)
+        >>> new_labels = torch.LongTensor([3, 4, 5])
+        >>> self.add_gt_(new_labels)
+        >>> print(str(self))  # xdoctest: +IGNORE_WANT
+        <AssignResult(num_gts=9, gt_inds.shape=(7,), max_overlaps.shape=(7,),
+                      labels.shape=(7,))>
+    """
+    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
+        self.num_gts = num_gts
+        self.gt_inds = gt_inds
+        self.max_overlaps = max_overlaps
+        self.labels = labels
+    @property
+    def num_preds(self):
+        """
+        Return the number of predictions in this assignment
+        """
+        return len(self.gt_inds)
+    @property
+    def info(self):
+        """
+        Returns a dictionary of info about the object
+        """
+        return {
+            'num_gts': self.num_gts,
+            'num_preds': self.num_preds,
+            'gt_inds': self.gt_inds,
+            'max_overlaps': self.max_overlaps,
+            'labels': self.labels,
+        }
+    def __nice__(self):
+        """
+        Create a "nice" summary string describing this assign result
+        """
+        parts = []
+        parts.append('num_gts={!r}'.format(self.num_gts))
+        if self.gt_inds is None:
+            parts.append('gt_inds={!r}'.format(self.gt_inds))
+        else:
+            parts.append('gt_inds.shape={!r}'.format(
+                tuple(self.gt_inds.shape)))
+        if self.max_overlaps is None:
+            parts.append('max_overlaps={!r}'.format(self.max_overlaps))
+        else:
+            parts.append('max_overlaps.shape={!r}'.format(
+                tuple(self.max_overlaps.shape)))
+        if self.labels is None:
+            parts.append('labels={!r}'.format(self.labels))
+        else:
+            parts.append('labels.shape={!r}'.format(tuple(self.labels.shape)))
+        return ', '.join(parts)
+    @classmethod
+    def random(cls, **kwargs):
+        """
+        Create random AssignResult for tests or debugging.
+        Kwargs:
+            num_preds: number of predicted boxes
+            num_gts: number of true boxes
+            p_ignore (float): probability of a predicted box assinged to an
+                ignored truth
+            p_assigned (float): probability of a predicted box not being
+                assigned
+            p_use_label (float | bool): with labels or not
+            rng (None | int | numpy.random.RandomState): seed or state
+        Returns:
+            AssignResult :
+        Example:
+            >>> from mmdet.core.bbox.assigners.assign_result import *  # NOQA
+            >>> self = AssignResult.random()
+            >>> print(self.info)
+        """
+        from mmdet.core.bbox import demodata
+        rng = demodata.ensure_rng(kwargs.get('rng', None))
+        num_gts = kwargs.get('num_gts', None)
+        num_preds = kwargs.get('num_preds', None)
+        p_ignore = kwargs.get('p_ignore', 0.3)
+        p_assigned = kwargs.get('p_assigned', 0.7)
+        p_use_label = kwargs.get('p_use_label', 0.5)
+        num_classes = kwargs.get('p_use_label', 3)
+        if num_gts is None:
+            num_gts = rng.randint(0, 8)
+        if num_preds is None:
+            num_preds = rng.randint(0, 16)
+        if num_gts == 0:
+            max_overlaps = torch.zeros(num_preds, dtype=torch.float32)
+            gt_inds = torch.zeros(num_preds, dtype=torch.int64)
+            if p_use_label is True or p_use_label < rng.rand():
+                labels = torch.zeros(num_preds, dtype=torch.int64)
+            else:
+                labels = None
+        else:
+            import numpy as np
+            # Create an overlap for each predicted box
+            max_overlaps = torch.from_numpy(rng.rand(num_preds))
+            # Construct gt_inds for each predicted box
+            is_assigned = torch.from_numpy(rng.rand(num_preds) < p_assigned)
+            # maximum number of assignments constraints
+            n_assigned = min(num_preds, min(num_gts, is_assigned.sum()))
+            assigned_idxs = np.where(is_assigned)[0]
+            rng.shuffle(assigned_idxs)
+            assigned_idxs = assigned_idxs[0:n_assigned]
+            assigned_idxs.sort()
+            is_assigned[:] = 0
+            is_assigned[assigned_idxs] = True
+            is_ignore = torch.from_numpy(
+                rng.rand(num_preds) < p_ignore) & is_assigned
+            gt_inds = torch.zeros(num_preds, dtype=torch.int64)
+            true_idxs = np.arange(num_gts)
+            rng.shuffle(true_idxs)
+            true_idxs = torch.from_numpy(true_idxs)
+            gt_inds[is_assigned] = true_idxs[:n_assigned]
+            gt_inds = torch.from_numpy(
+                rng.randint(1, num_gts + 1, size=num_preds))
+            gt_inds[is_ignore] = -1
+            gt_inds[~is_assigned] = 0
+            max_overlaps[~is_assigned] = 0
+            if p_use_label is True or p_use_label < rng.rand():
+                if num_classes == 0:
+                    labels = torch.zeros(num_preds, dtype=torch.int64)
+                else:
+                    labels = torch.from_numpy(
+                        rng.randint(1, num_classes + 1, size=num_preds))
+                    labels[~is_assigned] = 0
+            else:
+                labels = None
+        self = cls(num_gts, gt_inds, max_overlaps, labels)
+        return self
+    def add_gt_(self, gt_labels):
+        self_inds = torch.arange(
+            1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
+        self.gt_inds = torch.cat([self_inds, self.gt_inds])
+        self.max_overlaps = torch.cat(
+            [self.max_overlaps.new_ones(len(gt_labels)), self.max_overlaps])
+        if self.labels is not None:
+            self.labels = torch.cat([gt_labels, self.labels])
--- a/mmdet/core/bbox/assigners/atss_assigner.py
+++ b/mmdet/core/bbox/assigners/atss_assigner.py
+import torch
+from ..geometry import bbox_overlaps
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+class ATSSAssigner(BaseAssigner):
+    """Assign a corresponding gt bbox or background to each bbox.
+    Each proposals will be assigned with `0` or a positive integer
+    indicating the ground truth index.
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    Args:
+        topk (float): number of bbox selected in each level
+    """
+    def __init__(self, topk):
+        self.topk = topk
+    # https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py
+    def assign(self,
+               bboxes,
+               num_level_bboxes,
+               gt_bboxes,
+               gt_bboxes_ignore=None,
+               gt_labels=None):
+        """Assign gt to bboxes.
+        The assignment is done in following steps
+        1. compute iou between all bbox (bbox of all pyramid levels) and gt
+        2. compute center distance between all bbox and gt
+        3. on each pyramid level, for each gt, select k bbox whose center
+           are closest to the gt center, so we total select k*l bbox as
+           candidates for each gt
+        4. get corresponding iou for the these candidates, and compute the
+           mean and std, set mean + std as the iou threshold
+        5. select these candidates whose iou are greater than or equal to
+           the threshold as postive
+        6. limit the positive sample's center in gt
+        Args:
+            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
+            num_level_bboxes (List): num of bboxes in each level
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        INF = 100000000
+        bboxes = bboxes[:, :4]
+        num_gt, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
+        # compute iou between all bbox and gt
+        overlaps = bbox_overlaps(bboxes, gt_bboxes)
+        # assign 0 by default
+        assigned_gt_inds = overlaps.new_full((num_bboxes, ),
+                                             0,
+                                             dtype=torch.long)
+        if num_gt == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            max_overlaps = overlaps.new_zeros((num_bboxes, ))
+            if num_gt == 0:
+                # No truth, assign everything to background
+                assigned_gt_inds[:] = 0
+            if gt_labels is None:
+                assigned_labels = None
+            else:
+                assigned_labels = overlaps.new_zeros((num_bboxes, ),
+                                                     dtype=torch.long)
+            return AssignResult(
+                num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
+        # compute center distance between all bbox and gt
+        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
+        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
+        gt_points = torch.stack((gt_cx, gt_cy), dim=1)
+        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
+        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
+        bboxes_points = torch.stack((bboxes_cx, bboxes_cy), dim=1)
+        distances = (bboxes_points[:, None, :] -
+                     gt_points[None, :, :]).pow(2).sum(-1).sqrt()
+        # Selecting candidates based on the center distance
+        candidate_idxs = []
+        start_idx = 0
+        for level, bboxes_per_level in enumerate(num_level_bboxes):
+            # on each pyramid level, for each gt,
+            # select k bbox whose center are closest to the gt center
+            end_idx = start_idx + bboxes_per_level
+            distances_per_level = distances[start_idx:end_idx, :]
+            _, topk_idxs_per_level = distances_per_level.topk(
+                self.topk, dim=0, largest=False)
+            candidate_idxs.append(topk_idxs_per_level + start_idx)
+            start_idx = end_idx
+        candidate_idxs = torch.cat(candidate_idxs, dim=0)
+        # get corresponding iou for the these candidates, and compute the
+        # mean and std, set mean + std as the iou threshold
+        candidate_overlaps = overlaps[candidate_idxs, torch.arange(num_gt)]
+        overlaps_mean_per_gt = candidate_overlaps.mean(0)
+        overlaps_std_per_gt = candidate_overlaps.std(0)
+        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
+        is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
+        # limit the positive sample's center in gt
+        for gt_idx in range(num_gt):
+            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
+        ep_bboxes_cx = bboxes_cx.view(1, -1).expand(
+            num_gt, num_bboxes).contiguous().view(-1)
+        ep_bboxes_cy = bboxes_cy.view(1, -1).expand(
+            num_gt, num_bboxes).contiguous().view(-1)
+        candidate_idxs = candidate_idxs.view(-1)
+        # calculate the left, top, right, bottom distance between positive
+        # bbox center and gt side
+        l_ = ep_bboxes_cx[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 0]
+        t_ = ep_bboxes_cy[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 1]
+        r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].view(-1, num_gt)
+        b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].view(-1, num_gt)
+        is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01
+        is_pos = is_pos & is_in_gts
+        # if an anchor box is assigned to multiple gts,
+        # the one with the highest IoU will be selected.
+        overlaps_inf = torch.full_like(overlaps,
+                                       -INF).t().contiguous().view(-1)
+        index = candidate_idxs.view(-1)[is_pos.view(-1)]
+        overlaps_inf[index] = overlaps.t().contiguous().view(-1)[index]
+        overlaps_inf = overlaps_inf.view(num_gt, -1).t()
+        max_overlaps, argmax_overlaps = overlaps_inf.max(dim=1)
+        assigned_gt_inds[
+            max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1
+        if gt_labels is not None:
+            assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
+            pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
+            if pos_inds.numel() > 0:
+                assigned_labels[pos_inds] = gt_labels[
+                    assigned_gt_inds[pos_inds] - 1]
+        else:
+            assigned_labels = None
+        return AssignResult(
+            num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
--- a/mmdet/core/bbox/assigners/base_assigner.py
+++ b/mmdet/core/bbox/assigners/base_assigner.py
+from abc import ABCMeta, abstractmethod
+class BaseAssigner(metaclass=ABCMeta):
+    @abstractmethod
+    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+        pass
--- a/mmdet/core/bbox/assigners/max_iou_assigner.py
+++ b/mmdet/core/bbox/assigners/max_iou_assigner.py
+import torch
+from ..geometry import bbox_overlaps
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+class MaxIoUAssigner(BaseAssigner):
+    """Assign a corresponding gt bbox or background to each bbox.
+    Each proposals will be assigned with `-1`, `0`, or a positive integer
+    indicating the ground truth index.
+    - -1: don't care
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    Args:
+        pos_iou_thr (float): IoU threshold for positive bboxes.
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
+        min_pos_iou (float): Minimum iou for a bbox to be considered as a
+            positive bbox. Positive samples can have smaller IoU than
+            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
+        gt_max_assign_all (bool): Whether to assign all bboxes with the same
+            highest overlap with some gt to that gt.
+        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
+            `gt_bboxes_ignore` is specified). Negative values mean not
+            ignoring any bboxes.
+        ignore_wrt_candidates (bool): Whether to compute the iof between
+            `bboxes` and `gt_bboxes_ignore`, or the contrary.
+        gpu_assign_thr (int): The upper bound of the number of GT for GPU
+            assign. When the number of gt is above this threshold, will assign
+            on CPU device. Negative values mean not assign on CPU.
+    """
+    def __init__(self,
+                 pos_iou_thr,
+                 neg_iou_thr,
+                 min_pos_iou=.0,
+                 gt_max_assign_all=True,
+                 ignore_iof_thr=-1,
+                 ignore_wrt_candidates=True,
+                 gpu_assign_thr=-1):
+        self.pos_iou_thr = pos_iou_thr
+        self.neg_iou_thr = neg_iou_thr
+        self.min_pos_iou = min_pos_iou
+        self.gt_max_assign_all = gt_max_assign_all
+        self.ignore_iof_thr = ignore_iof_thr
+        self.ignore_wrt_candidates = ignore_wrt_candidates
+        self.gpu_assign_thr = gpu_assign_thr
+    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+        """Assign gt to bboxes.
+        This method assign a gt bbox to every bbox (proposal/anchor), each bbox
+        will be assigned with -1, 0, or a positive number. -1 means don't care,
+        0 means negative sample, positive number is the index (1-based) of
+        assigned gt.
+        The assignment is done in following steps, the order matters.
+        1. assign every bbox to -1
+        2. assign proposals whose iou with all gts < neg_iou_thr to 0
+        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
+           assign it to that bbox
+        4. for each gt bbox, assign its nearest proposals (may be more than
+           one) to itself
+        Args:
+            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        Example:
+            >>> self = MaxIoUAssigner(0.5, 0.5)
+            >>> bboxes = torch.Tensor([[0, 0, 10, 10], [10, 10, 20, 20]])
+            >>> gt_bboxes = torch.Tensor([[0, 0, 10, 9]])
+            >>> assign_result = self.assign(bboxes, gt_bboxes)
+            >>> expected_gt_inds = torch.LongTensor([1, 0])
+            >>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
+        """
+        assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
+            gt_bboxes.shape[0] > self.gpu_assign_thr) else False
+        # compute overlap and assign gt on CPU when number of GT is large
+        if assign_on_cpu:
+            device = bboxes.device
+            bboxes = bboxes.cpu()
+            gt_bboxes = gt_bboxes.cpu()
+            if gt_bboxes_ignore is not None:
+                gt_bboxes_ignore = gt_bboxes_ignore.cpu()
+            if gt_labels is not None:
+                gt_labels = gt_labels.cpu()
+        bboxes = bboxes[:, :4]
+        overlaps = bbox_overlaps(gt_bboxes, bboxes)
+        if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
+                gt_bboxes_ignore.numel() > 0):
+            if self.ignore_wrt_candidates:
+                ignore_overlaps = bbox_overlaps(
+                    bboxes, gt_bboxes_ignore, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
+            else:
+                ignore_overlaps = bbox_overlaps(
+                    gt_bboxes_ignore, bboxes, mode='iof')
+                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
+            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
+        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+        if assign_on_cpu:
+            assign_result.gt_inds = assign_result.gt_inds.to(device)
+            assign_result.max_overlaps = assign_result.max_overlaps.to(device)
+            if assign_result.labels is not None:
+                assign_result.labels = assign_result.labels.to(device)
+        return assign_result
+    def assign_wrt_overlaps(self, overlaps, gt_labels=None):
+        """Assign w.r.t. the overlaps of bboxes with gts.
+        Args:
+            overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
+                shape(k, n).
+            gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
+        # 1. assign -1 by default
+        assigned_gt_inds = overlaps.new_full((num_bboxes, ),
+                                             -1,
+                                             dtype=torch.long)
+        if num_gts == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            max_overlaps = overlaps.new_zeros((num_bboxes, ))
+            if num_gts == 0:
+                # No truth, assign everything to background
+                assigned_gt_inds[:] = 0
+            if gt_labels is None:
+                assigned_labels = None
+            else:
+                assigned_labels = overlaps.new_zeros((num_bboxes, ),
+                                                     dtype=torch.long)
+            return AssignResult(
+                num_gts,
+                assigned_gt_inds,
+                max_overlaps,
+                labels=assigned_labels)
+        # for each anchor, which gt best overlaps with it
+        # for each anchor, the max iou of all gts
+        max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+        # for each gt, which anchor best overlaps with it
+        # for each gt, the max iou of all proposals
+        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
+        # 2. assign negative: below
+        if isinstance(self.neg_iou_thr, float):
+            assigned_gt_inds[(max_overlaps >= 0)
+                             & (max_overlaps < self.neg_iou_thr)] = 0
+        elif isinstance(self.neg_iou_thr, tuple):
+            assert len(self.neg_iou_thr) == 2
+            assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
+                             & (max_overlaps < self.neg_iou_thr[1])] = 0
+        # 3. assign positive: above positive IoU threshold
+        pos_inds = max_overlaps >= self.pos_iou_thr
+        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
+        # 4. assign fg: for each gt, proposals with highest IoU
+        for i in range(num_gts):
+            if gt_max_overlaps[i] >= self.min_pos_iou:
+                if self.gt_max_assign_all:
+                    max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
+                    assigned_gt_inds[max_iou_inds] = i + 1
+                else:
+                    assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
+        if gt_labels is not None:
+            assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
+            pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
+            if pos_inds.numel() > 0:
+                assigned_labels[pos_inds] = gt_labels[
+                    assigned_gt_inds[pos_inds] - 1]
+        else:
+            assigned_labels = None
+        return AssignResult(
+            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
--- a/mmdet/core/bbox/assigners/point_assigner.py
+++ b/mmdet/core/bbox/assigners/point_assigner.py
+import torch
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+class PointAssigner(BaseAssigner):
+    """Assign a corresponding gt bbox or background to each point.
+    Each proposals will be assigned with `0`, or a positive integer
+    indicating the ground truth index.
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    """
+    def __init__(self, scale=4, pos_num=3):
+        self.scale = scale
+        self.pos_num = pos_num
+    def assign(self, points, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+        """Assign gt to points.
+        This method assign a gt bbox to every points set, each points set
+        will be assigned with  0, or a positive number.
+        0 means negative sample, positive number is the index (1-based) of
+        assigned gt.
+        The assignment is done in following steps, the order matters.
+        1. assign every points to 0
+        2. A point is assigned to some gt bbox if
+            (i) the point is within the k closest points to the gt bbox
+            (ii) the distance between this point and the gt is smaller than
+                other gt bboxes
+        Args:
+            points (Tensor): points to be assigned, shape(n, 3) while last
+                dimension stands for (x, y, stride).
+            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+                NOTE: currently unused.
+            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+        Returns:
+            :obj:`AssignResult`: The assign result.
+        """
+        num_points = points.shape[0]
+        num_gts = gt_bboxes.shape[0]
+        if num_gts == 0 or num_points == 0:
+            # If no truth assign everything to the background
+            assigned_gt_inds = points.new_full((num_points, ),
+                                               0,
+                                               dtype=torch.long)
+            if gt_labels is None:
+                assigned_labels = None
+            else:
+                assigned_labels = points.new_zeros((num_points, ),
+                                                   dtype=torch.long)
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+        points_xy = points[:, :2]
+        points_stride = points[:, 2]
+        points_lvl = torch.log2(
+            points_stride).int()  # [3...,4...,5...,6...,7...]
+        lvl_min, lvl_max = points_lvl.min(), points_lvl.max()
+        # assign gt box
+        gt_bboxes_xy = (gt_bboxes[:, :2] + gt_bboxes[:, 2:]) / 2
+        gt_bboxes_wh = (gt_bboxes[:, 2:] - gt_bboxes[:, :2]).clamp(min=1e-6)
+        scale = self.scale
+        gt_bboxes_lvl = ((torch.log2(gt_bboxes_wh[:, 0] / scale) +
+                          torch.log2(gt_bboxes_wh[:, 1] / scale)) / 2).int()
+        gt_bboxes_lvl = torch.clamp(gt_bboxes_lvl, min=lvl_min, max=lvl_max)
+        # stores the assigned gt index of each point
+        assigned_gt_inds = points.new_zeros((num_points, ), dtype=torch.long)
+        # stores the assigned gt dist (to this point) of each point
+        assigned_gt_dist = points.new_full((num_points, ), float('inf'))
+        points_range = torch.arange(points.shape[0])
+        for idx in range(num_gts):
+            gt_lvl = gt_bboxes_lvl[idx]
+            # get the index of points in this level
+            lvl_idx = gt_lvl == points_lvl
+            points_index = points_range[lvl_idx]
+            # get the points in this level
+            lvl_points = points_xy[lvl_idx, :]
+            # get the center point of gt
+            gt_point = gt_bboxes_xy[[idx], :]
+            # get width and height of gt
+            gt_wh = gt_bboxes_wh[[idx], :]
+            # compute the distance between gt center and
+            #   all points in this level
+            points_gt_dist = ((lvl_points - gt_point) / gt_wh).norm(dim=1)
+            # find the nearest k points to gt center in this level
+            min_dist, min_dist_index = torch.topk(
+                points_gt_dist, self.pos_num, largest=False)
+            # the index of nearest k points to gt center in this level
+            min_dist_points_index = points_index[min_dist_index]
+            # The less_than_recorded_index stores the index
+            #   of min_dist that is less then the assigned_gt_dist. Where
+            #   assigned_gt_dist stores the dist from previous assigned gt
+            #   (if exist) to each point.
+            less_than_recorded_index = min_dist < assigned_gt_dist[
+                min_dist_points_index]
+            # The min_dist_points_index stores the index of points satisfy:
+            #   (1) it is k nearest to current gt center in this level.
+            #   (2) it is closer to current gt center than other gt center.
+            min_dist_points_index = min_dist_points_index[
+                less_than_recorded_index]
+            # assign the result
+            assigned_gt_inds[min_dist_points_index] = idx + 1
+            assigned_gt_dist[min_dist_points_index] = min_dist[
+                less_than_recorded_index]
+        if gt_labels is not None:
+            assigned_labels = assigned_gt_inds.new_zeros((num_points, ))
+            pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
+            if pos_inds.numel() > 0:
+                assigned_labels[pos_inds] = gt_labels[
+                    assigned_gt_inds[pos_inds] - 1]
+        else:
+            assigned_labels = None
+        return AssignResult(
+            num_gts, assigned_gt_inds, None, labels=assigned_labels)