Commit 45af4242 authored by Kai Chen's avatar Kai Chen
Browse files

Merge branch 'dev' into single-stage

parents e8d16bf2 5686a375
from .version import __version__, short_version
__all__ = ['__version__', 'short_version']
from .rpn_ops import *
from .bbox_ops import *
from .mask_ops import *
from .targets import *
from .losses import *
from .eval import *
from .parallel import *
from .post_processing import *
from .utils import *
from .anchor import * # noqa: F401, F403
from .bbox_ops import * # noqa: F401, F403
from .mask_ops import * # noqa: F401, F403
from .targets import * # noqa: F401, F403
from .losses import * # noqa: F401, F403
from .eval import * # noqa: F401, F403
from .parallel import * # noqa: F401, F403
from .post_processing import * # noqa: F401, F403
from .utils import * # noqa: F401, F403
from .anchor_generator import AnchorGenerator
from .anchor_target import anchor_target
__all__ = ['AnchorGenerator', 'anchor_target']
from .geometry import bbox_overlaps
from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps,
bbox_sampling, sample_positives, sample_negatives)
bbox_sampling, bbox_sampling_pos, bbox_sampling_neg,
sample_bboxes)
from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
from .bbox_target import bbox_target
__all__ = [
'bbox_overlaps', 'random_choice', 'bbox_assign',
'bbox_assign_wrt_overlaps', 'bbox_sampling', 'sample_positives',
'sample_negatives', 'bbox2delta', 'delta2bbox', 'bbox_flip',
'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
'bbox_target'
'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos',
'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox',
'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox',
'bbox2result', 'bbox_target'
]
......@@ -78,27 +78,32 @@ def bbox_assign_wrt_overlaps(overlaps,
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=.0):
"""Assign a corresponding gt bbox or background to each proposal/anchor
This function assign a gt bbox to every proposal, each proposals will be
"""Assign a corresponding gt bbox or background to each proposal/anchor.
This method assign a gt bbox to every proposal, each proposals will be
assigned with -1, 0, or a positive number. -1 means don't care, 0 means
negative sample, positive number is the index (1-based) of assigned gt.
The assignment is done in following steps, the order matters:
1. assign every anchor to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each anchor, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals(may be more than one)
to itself
Args:
overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k)
gt_labels(Tensor, optional): shape (k, )
pos_iou_thr(float): iou threshold for positive bboxes
neg_iou_thr(float or tuple): iou threshold for negative bboxes
min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox,
for RPN, it is usually set as 0, for Fast R-CNN,
it is usually set as pos_iou_thr
overlaps (Tensor): Overlaps between n proposals and k gt_bboxes,
shape(n, k).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum IoU for a bbox to be considered as a
positive bbox. This argument only affects the 4th step.
Returns:
tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, )
tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps,
max_overlaps), shape (n, )
"""
num_bboxes, num_gts = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default
......@@ -144,8 +149,9 @@ def bbox_assign_wrt_overlaps(overlaps,
return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps
def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
"""Balance sampling for positive bboxes/anchors
def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True):
"""Balance sampling for positive bboxes/anchors.
1. calculate average positive num for each gt: num_per_gt
2. sample at most num_per_gt positives for each gt
3. random sampling from rest anchors if not enough fg
......@@ -186,15 +192,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
return sampled_inds
def sample_negatives(assigned_gt_inds,
def bbox_sampling_neg(assigned_gt_inds,
num_expected,
max_overlaps=None,
balance_thr=0,
hard_fraction=0.5):
"""Balance sampling for negative bboxes/anchors
negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr)
and easy(iou < balance_thr), around equal number of bg are sampled
from each set.
"""Balance sampling for negative bboxes/anchors.
Negative samples are split into 2 set: hard (balance_thr <= iou <
neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
by `hard_fraction`.
"""
neg_inds = torch.nonzero(assigned_gt_inds == 0)
if neg_inds.numel() != 0:
......@@ -247,17 +254,87 @@ def bbox_sampling(assigned_gt_inds,
max_overlaps=None,
neg_balance_thr=0,
neg_hard_fraction=0.5):
"""Sample positive and negative bboxes given assigned results.
Args:
assigned_gt_inds (Tensor): Assigned gt indices for each bbox.
num_expected (int): Expected total samples (pos and neg).
pos_fraction (float): Positive sample fraction.
neg_pos_ub (float): Negative/Positive upper bound.
pos_balance_sampling(bool): Whether to sample positive samples around
each gt bbox evenly.
max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts.
Used for negative balance sampling only.
neg_balance_thr (float, optional): IoU threshold for simple/hard
negative balance sampling.
neg_hard_fraction (float, optional): Fraction of hard negative samples
for negative balance sampling.
Returns:
tuple[Tensor]: positive bbox indices, negative bbox indices.
"""
num_expected_pos = int(num_expected * pos_fraction)
pos_inds = sample_positives(assigned_gt_inds, num_expected_pos,
pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos,
pos_balance_sampling)
# We found that sampled indices have duplicated items occasionally.
# (mab be a bug of PyTorch)
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_neg_max = int(
neg_pos_ub *
num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub)
num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos)
neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg,
neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg,
max_overlaps, neg_balance_thr,
neg_hard_fraction)
neg_inds = neg_inds.unique()
return pos_inds, neg_inds
def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates and
ground truth bboxes, which includes 3 steps.
1. Assign gt to each bbox.
2. Add gt bboxes to the sampling pool (optional).
3. Perform positive and negative sampling.
Args:
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO,
`crowd` bboxes are considered as ignored.
gt_labels (Tensor): Class labels of ground truth bboxes.
cfg (dict): Sampling configs.
Returns:
tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds,
pos_gt_bboxes, pos_gt_labels
"""
bboxes = bboxes[:, :4]
assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels,
cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou,
cfg.crowd_thr)
if cfg.add_gt_as_proposals:
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
gt_assign_self = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device)
assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
assigned_labels = torch.cat([gt_labels, assigned_labels])
pos_inds, neg_inds = bbox_sampling(
assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
pos_bboxes = bboxes[pos_inds]
neg_bboxes = bboxes[neg_inds]
pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
pos_gt_labels = assigned_labels[pos_inds]
return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes,
pos_gt_labels)
import torch
import numpy as np
import mmcv
from .segms import polys_to_mask_wrt_box
def mask_target(pos_proposals_list,
pos_assigned_gt_inds_list,
gt_polys_list,
img_meta,
def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
cfg):
cfg_list = [cfg for _ in range(len(pos_proposals_list))]
mask_targets = map(mask_target_single, pos_proposals_list,
pos_assigned_gt_inds_list, gt_polys_list, img_meta,
cfg_list)
mask_targets = torch.cat(tuple(mask_targets), dim=0)
pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
mask_targets = torch.cat(list(mask_targets))
return mask_targets
def mask_target_single(pos_proposals,
pos_assigned_gt_inds,
gt_polys,
img_meta,
cfg):
def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
mask_size = cfg.mask_size
num_pos = pos_proposals.size(0)
mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size))
mask_targets = []
if num_pos > 0:
pos_proposals = pos_proposals.cpu().numpy()
proposals_np = pos_proposals.cpu().numpy()
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
scale_factor = img_meta['scale_factor']
for i in range(num_pos):
bbox = pos_proposals[i, :] / scale_factor
polys = gt_polys[pos_assigned_gt_inds[i]]
mask = polys_to_mask_wrt_box(polys, bbox, mask_size)
mask = np.array(mask > 0, dtype=np.float32)
mask_targets[i, ...] = torch.from_numpy(mask).to(
mask_targets.device)
gt_mask = gt_masks[pos_assigned_gt_inds[i]]
bbox = proposals_np[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1)
# mask is uint8 both before and after resizing
target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
(mask_size, mask_size))
mask_targets.append(target)
mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
pos_proposals.device)
else:
mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
return mask_targets
# flake8: noqa
# This file is copied from Detectron.
# Copyright (c) 2017-present, Facebook, Inc.
......
from .anchor_generator import *
from .anchor_target import *
from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook,
DistSamplerSeedHook)
from .hooks import EmptyCacheHook
from .misc import tensor2imgs, unmap, multi_apply
__all__ = [
'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook',
'EmptyCacheHook', 'tensor2imgs', 'unmap', 'multi_apply'
'tensor2imgs', 'unmap', 'multi_apply'
]
......@@ -38,7 +38,8 @@ def _init_dist_slurm(backend, **kwargs):
raise NotImplementedError
# modified from https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9
# modified from
# https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9
def all_reduce_coalesced(tensors):
buckets = OrderedDict()
for tensor in tensors:
......
import torch
from mmcv.runner import Hook
class EmptyCacheHook(Hook):
def before_epoch(self, runner):
torch.cuda.empty_cache()
def after_epoch(self, runner):
torch.cuda.empty_cache()
from .coco import CocoDataset
from .loader import (collate, GroupSampler, DistributedGroupSampler,
build_dataloader)
from .utils import DataContainer, to_tensor, random_scale, show_ann
__all__ = ['CocoDataset']
__all__ = [
'CocoDataset', 'collate', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'DataContainer', 'to_tensor', 'random_scale',
'show_ann'
]
......@@ -5,71 +5,12 @@ import numpy as np
from pycocotools.coco import COCO
from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform,
from .transforms import (ImageTransform, BboxTransform, MaskTransform,
Numpy2Tensor)
from .utils import to_tensor, show_ann, random_scale
from .utils import DataContainer as DC
def parse_ann_info(ann_info, cat2label, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
cat2label (dict): The mapping from category ids to labels.
with_mask (bool): Whether to parse mask annotations.
Returns:
tuple: gt_bboxes, gt_labels and gt_mask_info
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# each mask consists of one or several polys, each poly is a list of float.
if with_mask:
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(cat2label[ann['category_id']])
if with_mask:
# Note polys are not resized
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
class CocoDataset(Dataset):
def __init__(self,
......@@ -138,7 +79,7 @@ class CocoDataset(Dataset):
self.img_transform = ImageTransform(
size_divisor=self.size_divisor, **self.img_norm_cfg)
self.bbox_transform = BboxTransform()
self.mask_transform = PolyMaskTransform()
self.mask_transform = MaskTransform()
self.numpy2tensor = Numpy2Tensor()
def __len__(self):
......@@ -162,6 +103,70 @@ class CocoDataset(Dataset):
ann_info = self.coco.loadAnns(ann_ids)
return ann_info
def _parse_ann_info(self, ann_info, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, mask_polys, poly_lens.
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# Two formats are provided.
# 1. mask: a binary map of the same size of the image.
# 2. polys: each mask consists of one or several polys, each poly is a
# list of float.
if with_mask:
gt_masks = []
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
if with_mask:
gt_masks.append(self.coco.annToMask(ann))
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['masks'] = gt_masks
# poly format is not used in the current implementation
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
......@@ -200,7 +205,7 @@ class CocoDataset(Dataset):
idx = self._rand_another(idx)
continue
ann = parse_ann_info(ann_info, self.cat2label, self.with_mask)
ann = self._parse_ann_info(ann_info, self.with_mask)
gt_bboxes = ann['bboxes']
gt_labels = ann['labels']
gt_bboxes_ignore = ann['bboxes_ignore']
......@@ -223,10 +228,8 @@ class CocoDataset(Dataset):
scale_factor, flip)
if self.with_mask:
gt_mask_polys, gt_poly_lens, num_polys_per_mask = \
self.mask_transform(
ann['mask_polys'], ann['poly_lens'],
img_info['height'], img_info['width'], flip)
gt_masks = self.mask_transform(ann['masks'], pad_shape,
scale_factor, flip)
ori_shape = (img_info['height'], img_info['width'], 3)
img_meta = dict(
......@@ -247,10 +250,7 @@ class CocoDataset(Dataset):
if self.with_crowd:
data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
if self.with_mask:
data['gt_masks'] = dict(
polys=DC(gt_mask_polys, cpu_only=True),
poly_lens=DC(gt_poly_lens, cpu_only=True),
polys_per_mask=DC(num_polys_per_mask, cpu_only=True))
data['gt_masks'] = DC(gt_masks, cpu_only=True)
return data
def prepare_test_img(self, idx):
......
......@@ -10,7 +10,8 @@ __all__ = [
class ImageTransform(object):
"""Preprocess an image
"""Preprocess an image.
1. rescale the image to expected size
2. normalize the image
3. flip the image (if needed)
......@@ -59,7 +60,8 @@ def bbox_flip(bboxes, img_shape):
class BboxTransform(object):
"""Preprocess gt bboxes
"""Preprocess gt bboxes.
1. rescale bboxes according to image size
2. flip bboxes (if needed)
3. pad the first dimension to `max_num_gts`
......@@ -84,17 +86,12 @@ class BboxTransform(object):
class PolyMaskTransform(object):
"""Preprocess polygons."""
def __init__(self):
pass
def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False):
"""
Args:
gt_mask_polys(list): a list of masks, each mask is a list of polys,
each poly is a list of numbers
gt_poly_lens(list): a list of int, indicating the size of each poly
"""
if flip:
gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w)
num_polys_per_mask = np.array(
......@@ -108,6 +105,28 @@ class PolyMaskTransform(object):
return gt_mask_polys, gt_poly_lens, num_polys_per_mask
class MaskTransform(object):
"""Preprocess masks.
1. resize masks to expected size and stack to a single array
2. flip the masks (if needed)
3. pad the masks (if needed)
"""
def __call__(self, masks, pad_shape, scale_factor, flip=False):
masks = [
mmcv.imrescale(mask, scale_factor, interpolation='nearest')
for mask in masks
]
if flip:
masks = [mask[:, ::-1] for mask in masks]
padded_masks = [
mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
]
padded_masks = np.stack(padded_masks, axis=0)
return padded_masks
class Numpy2Tensor(object):
def __init__(self):
......
from .data_container import DataContainer
from .misc import *
from .misc import to_tensor, random_scale, show_ann
__all__ = ['DataContainer', 'to_tensor', 'random_scale', 'show_ann']
from .detectors import *
from .builder import *
from .detectors import BaseDetector, RPN, FasterRCNN, MaskRCNN
from .builder import (build_neck, build_rpn_head, build_roi_extractor,
build_bbox_head, build_mask_head, build_detector)
__all__ = [
'BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 'build_backbone',
'build_neck', 'build_rpn_head', 'build_roi_extractor', 'build_bbox_head',
'build_mask_head', 'build_detector'
]
from .resnet import resnet
__all__ = ['resnet']
......@@ -43,16 +43,20 @@ class ConvFCRoIHead(BBoxHead):
self.fc_out_channels = fc_out_channels
# add shared convs and fcs
self.shared_convs, self.shared_fcs, last_layer_dim = self._add_conv_fc_branch(
self.num_shared_convs, self.num_shared_fcs, self.in_channels, True)
self.shared_convs, self.shared_fcs, last_layer_dim = \
self._add_conv_fc_branch(
self.num_shared_convs, self.num_shared_fcs, self.in_channels,
True)
self.shared_out_channels = last_layer_dim
# add cls specific branch
self.cls_convs, self.cls_fcs, self.cls_last_dim = self._add_conv_fc_branch(
self.cls_convs, self.cls_fcs, self.cls_last_dim = \
self._add_conv_fc_branch(
self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels)
# add reg specific branch
self.reg_convs, self.reg_fcs, self.reg_last_dim = self._add_conv_fc_branch(
self.reg_convs, self.reg_fcs, self.reg_last_dim = \
self._add_conv_fc_branch(
self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels)
if self.num_shared_fcs == 0 and not self.with_avg_pool:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment