Commit 45af4242 authored by Kai Chen's avatar Kai Chen
Browse files

Merge branch 'dev' into single-stage

parents e8d16bf2 5686a375
from .version import __version__, short_version from .version import __version__, short_version
__all__ = ['__version__', 'short_version']
from .rpn_ops import * from .anchor import * # noqa: F401, F403
from .bbox_ops import * from .bbox_ops import * # noqa: F401, F403
from .mask_ops import * from .mask_ops import * # noqa: F401, F403
from .targets import * from .targets import * # noqa: F401, F403
from .losses import * from .losses import * # noqa: F401, F403
from .eval import * from .eval import * # noqa: F401, F403
from .parallel import * from .parallel import * # noqa: F401, F403
from .post_processing import * from .post_processing import * # noqa: F401, F403
from .utils import * from .utils import * # noqa: F401, F403
from .anchor_generator import AnchorGenerator
from .anchor_target import anchor_target
__all__ = ['AnchorGenerator', 'anchor_target']
from .geometry import bbox_overlaps from .geometry import bbox_overlaps
from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps, from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps,
bbox_sampling, sample_positives, sample_negatives) bbox_sampling, bbox_sampling_pos, bbox_sampling_neg,
sample_bboxes)
from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
from .bbox_target import bbox_target from .bbox_target import bbox_target
__all__ = [ __all__ = [
'bbox_overlaps', 'random_choice', 'bbox_assign', 'bbox_overlaps', 'random_choice', 'bbox_assign',
'bbox_assign_wrt_overlaps', 'bbox_sampling', 'sample_positives', 'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos',
'sample_negatives', 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox',
'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox',
'bbox_target' 'bbox2result', 'bbox_target'
] ]
...@@ -78,27 +78,32 @@ def bbox_assign_wrt_overlaps(overlaps, ...@@ -78,27 +78,32 @@ def bbox_assign_wrt_overlaps(overlaps,
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.5, neg_iou_thr=0.5,
min_pos_iou=.0): min_pos_iou=.0):
"""Assign a corresponding gt bbox or background to each proposal/anchor """Assign a corresponding gt bbox or background to each proposal/anchor.
This function assign a gt bbox to every proposal, each proposals will be
This method assign a gt bbox to every proposal, each proposals will be
assigned with -1, 0, or a positive number. -1 means don't care, 0 means assigned with -1, 0, or a positive number. -1 means don't care, 0 means
negative sample, positive number is the index (1-based) of assigned gt. negative sample, positive number is the index (1-based) of assigned gt.
The assignment is done in following steps, the order matters: The assignment is done in following steps, the order matters:
1. assign every anchor to -1 1. assign every anchor to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0 2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each anchor, if the iou with its nearest gt >= pos_iou_thr, 3. for each anchor, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox assign it to that bbox
4. for each gt bbox, assign its nearest proposals(may be more than one) 4. for each gt bbox, assign its nearest proposals(may be more than one)
to itself to itself
Args: Args:
overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k) overlaps (Tensor): Overlaps between n proposals and k gt_bboxes,
gt_labels(Tensor, optional): shape (k, ) shape(n, k).
pos_iou_thr(float): iou threshold for positive bboxes gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
neg_iou_thr(float or tuple): iou threshold for negative bboxes pos_iou_thr (float): IoU threshold for positive bboxes.
min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
for RPN, it is usually set as 0, for Fast R-CNN, min_pos_iou (float): Minimum IoU for a bbox to be considered as a
it is usually set as pos_iou_thr positive bbox. This argument only affects the 4th step.
Returns: Returns:
tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps,
max_overlaps), shape (n, )
""" """
num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) num_bboxes, num_gts = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default # 1. assign -1 by default
...@@ -144,8 +149,9 @@ def bbox_assign_wrt_overlaps(overlaps, ...@@ -144,8 +149,9 @@ def bbox_assign_wrt_overlaps(overlaps,
return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps
def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True):
"""Balance sampling for positive bboxes/anchors """Balance sampling for positive bboxes/anchors.
1. calculate average positive num for each gt: num_per_gt 1. calculate average positive num for each gt: num_per_gt
2. sample at most num_per_gt positives for each gt 2. sample at most num_per_gt positives for each gt
3. random sampling from rest anchors if not enough fg 3. random sampling from rest anchors if not enough fg
...@@ -186,15 +192,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): ...@@ -186,15 +192,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
return sampled_inds return sampled_inds
def sample_negatives(assigned_gt_inds, def bbox_sampling_neg(assigned_gt_inds,
num_expected, num_expected,
max_overlaps=None, max_overlaps=None,
balance_thr=0, balance_thr=0,
hard_fraction=0.5): hard_fraction=0.5):
"""Balance sampling for negative bboxes/anchors """Balance sampling for negative bboxes/anchors.
negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr)
and easy(iou < balance_thr), around equal number of bg are sampled Negative samples are split into 2 set: hard (balance_thr <= iou <
from each set. neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
by `hard_fraction`.
""" """
neg_inds = torch.nonzero(assigned_gt_inds == 0) neg_inds = torch.nonzero(assigned_gt_inds == 0)
if neg_inds.numel() != 0: if neg_inds.numel() != 0:
...@@ -247,17 +254,87 @@ def bbox_sampling(assigned_gt_inds, ...@@ -247,17 +254,87 @@ def bbox_sampling(assigned_gt_inds,
max_overlaps=None, max_overlaps=None,
neg_balance_thr=0, neg_balance_thr=0,
neg_hard_fraction=0.5): neg_hard_fraction=0.5):
"""Sample positive and negative bboxes given assigned results.
Args:
assigned_gt_inds (Tensor): Assigned gt indices for each bbox.
num_expected (int): Expected total samples (pos and neg).
pos_fraction (float): Positive sample fraction.
neg_pos_ub (float): Negative/Positive upper bound.
pos_balance_sampling(bool): Whether to sample positive samples around
each gt bbox evenly.
max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts.
Used for negative balance sampling only.
neg_balance_thr (float, optional): IoU threshold for simple/hard
negative balance sampling.
neg_hard_fraction (float, optional): Fraction of hard negative samples
for negative balance sampling.
Returns:
tuple[Tensor]: positive bbox indices, negative bbox indices.
"""
num_expected_pos = int(num_expected * pos_fraction) num_expected_pos = int(num_expected * pos_fraction)
pos_inds = sample_positives(assigned_gt_inds, num_expected_pos, pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos,
pos_balance_sampling) pos_balance_sampling)
# We found that sampled indices have duplicated items occasionally.
# (mab be a bug of PyTorch)
pos_inds = pos_inds.unique() pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel() num_sampled_pos = pos_inds.numel()
num_neg_max = int( num_neg_max = int(
neg_pos_ub * neg_pos_ub *
num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub) num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub)
num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos) num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos)
neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg, neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg,
max_overlaps, neg_balance_thr, max_overlaps, neg_balance_thr,
neg_hard_fraction) neg_hard_fraction)
neg_inds = neg_inds.unique() neg_inds = neg_inds.unique()
return pos_inds, neg_inds return pos_inds, neg_inds
def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates and
ground truth bboxes, which includes 3 steps.
1. Assign gt to each bbox.
2. Add gt bboxes to the sampling pool (optional).
3. Perform positive and negative sampling.
Args:
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO,
`crowd` bboxes are considered as ignored.
gt_labels (Tensor): Class labels of ground truth bboxes.
cfg (dict): Sampling configs.
Returns:
tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds,
pos_gt_bboxes, pos_gt_labels
"""
bboxes = bboxes[:, :4]
assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels,
cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou,
cfg.crowd_thr)
if cfg.add_gt_as_proposals:
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
gt_assign_self = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device)
assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
assigned_labels = torch.cat([gt_labels, assigned_labels])
pos_inds, neg_inds = bbox_sampling(
assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
pos_bboxes = bboxes[pos_inds]
neg_bboxes = bboxes[neg_inds]
pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
pos_gt_labels = assigned_labels[pos_inds]
return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes,
pos_gt_labels)
import torch import torch
import numpy as np import numpy as np
import mmcv
from .segms import polys_to_mask_wrt_box
def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
def mask_target(pos_proposals_list,
pos_assigned_gt_inds_list,
gt_polys_list,
img_meta,
cfg): cfg):
cfg_list = [cfg for _ in range(len(pos_proposals_list))] cfg_list = [cfg for _ in range(len(pos_proposals_list))]
mask_targets = map(mask_target_single, pos_proposals_list, mask_targets = map(mask_target_single, pos_proposals_list,
pos_assigned_gt_inds_list, gt_polys_list, img_meta, pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
cfg_list) mask_targets = torch.cat(list(mask_targets))
mask_targets = torch.cat(tuple(mask_targets), dim=0)
return mask_targets return mask_targets
def mask_target_single(pos_proposals, def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
pos_assigned_gt_inds,
gt_polys,
img_meta,
cfg):
mask_size = cfg.mask_size mask_size = cfg.mask_size
num_pos = pos_proposals.size(0) num_pos = pos_proposals.size(0)
mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size)) mask_targets = []
if num_pos > 0: if num_pos > 0:
pos_proposals = pos_proposals.cpu().numpy() proposals_np = pos_proposals.cpu().numpy()
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
scale_factor = img_meta['scale_factor']
for i in range(num_pos): for i in range(num_pos):
bbox = pos_proposals[i, :] / scale_factor gt_mask = gt_masks[pos_assigned_gt_inds[i]]
polys = gt_polys[pos_assigned_gt_inds[i]] bbox = proposals_np[i, :].astype(np.int32)
mask = polys_to_mask_wrt_box(polys, bbox, mask_size) x1, y1, x2, y2 = bbox
mask = np.array(mask > 0, dtype=np.float32) w = np.maximum(x2 - x1 + 1, 1)
mask_targets[i, ...] = torch.from_numpy(mask).to( h = np.maximum(y2 - y1 + 1, 1)
mask_targets.device) # mask is uint8 both before and after resizing
target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
(mask_size, mask_size))
mask_targets.append(target)
mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
pos_proposals.device)
else:
mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
return mask_targets return mask_targets
# flake8: noqa
# This file is copied from Detectron. # This file is copied from Detectron.
# Copyright (c) 2017-present, Facebook, Inc. # Copyright (c) 2017-present, Facebook, Inc.
......
from .anchor_generator import *
from .anchor_target import *
from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook, from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook,
DistSamplerSeedHook) DistSamplerSeedHook)
from .hooks import EmptyCacheHook
from .misc import tensor2imgs, unmap, multi_apply from .misc import tensor2imgs, unmap, multi_apply
__all__ = [ __all__ = [
'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook', 'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook',
'EmptyCacheHook', 'tensor2imgs', 'unmap', 'multi_apply' 'tensor2imgs', 'unmap', 'multi_apply'
] ]
...@@ -38,7 +38,8 @@ def _init_dist_slurm(backend, **kwargs): ...@@ -38,7 +38,8 @@ def _init_dist_slurm(backend, **kwargs):
raise NotImplementedError raise NotImplementedError
# modified from https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9 # modified from
# https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9
def all_reduce_coalesced(tensors): def all_reduce_coalesced(tensors):
buckets = OrderedDict() buckets = OrderedDict()
for tensor in tensors: for tensor in tensors:
......
import torch
from mmcv.runner import Hook
class EmptyCacheHook(Hook):
def before_epoch(self, runner):
torch.cuda.empty_cache()
def after_epoch(self, runner):
torch.cuda.empty_cache()
from .coco import CocoDataset from .coco import CocoDataset
from .loader import (collate, GroupSampler, DistributedGroupSampler,
build_dataloader)
from .utils import DataContainer, to_tensor, random_scale, show_ann
__all__ = ['CocoDataset'] __all__ = [
'CocoDataset', 'collate', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'DataContainer', 'to_tensor', 'random_scale',
'show_ann'
]
...@@ -5,71 +5,12 @@ import numpy as np ...@@ -5,71 +5,12 @@ import numpy as np
from pycocotools.coco import COCO from pycocotools.coco import COCO
from torch.utils.data import Dataset from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform, from .transforms import (ImageTransform, BboxTransform, MaskTransform,
Numpy2Tensor) Numpy2Tensor)
from .utils import to_tensor, show_ann, random_scale from .utils import to_tensor, show_ann, random_scale
from .utils import DataContainer as DC from .utils import DataContainer as DC
def parse_ann_info(ann_info, cat2label, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
cat2label (dict): The mapping from category ids to labels.
with_mask (bool): Whether to parse mask annotations.
Returns:
tuple: gt_bboxes, gt_labels and gt_mask_info
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# each mask consists of one or several polys, each poly is a list of float.
if with_mask:
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(cat2label[ann['category_id']])
if with_mask:
# Note polys are not resized
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
class CocoDataset(Dataset): class CocoDataset(Dataset):
def __init__(self, def __init__(self,
...@@ -138,7 +79,7 @@ class CocoDataset(Dataset): ...@@ -138,7 +79,7 @@ class CocoDataset(Dataset):
self.img_transform = ImageTransform( self.img_transform = ImageTransform(
size_divisor=self.size_divisor, **self.img_norm_cfg) size_divisor=self.size_divisor, **self.img_norm_cfg)
self.bbox_transform = BboxTransform() self.bbox_transform = BboxTransform()
self.mask_transform = PolyMaskTransform() self.mask_transform = MaskTransform()
self.numpy2tensor = Numpy2Tensor() self.numpy2tensor = Numpy2Tensor()
def __len__(self): def __len__(self):
...@@ -162,6 +103,70 @@ class CocoDataset(Dataset): ...@@ -162,6 +103,70 @@ class CocoDataset(Dataset):
ann_info = self.coco.loadAnns(ann_ids) ann_info = self.coco.loadAnns(ann_ids)
return ann_info return ann_info
def _parse_ann_info(self, ann_info, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, mask_polys, poly_lens.
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# Two formats are provided.
# 1. mask: a binary map of the same size of the image.
# 2. polys: each mask consists of one or several polys, each poly is a
# list of float.
if with_mask:
gt_masks = []
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
if with_mask:
gt_masks.append(self.coco.annToMask(ann))
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['masks'] = gt_masks
# poly format is not used in the current implementation
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
def _set_group_flag(self): def _set_group_flag(self):
"""Set flag according to image aspect ratio. """Set flag according to image aspect ratio.
...@@ -200,7 +205,7 @@ class CocoDataset(Dataset): ...@@ -200,7 +205,7 @@ class CocoDataset(Dataset):
idx = self._rand_another(idx) idx = self._rand_another(idx)
continue continue
ann = parse_ann_info(ann_info, self.cat2label, self.with_mask) ann = self._parse_ann_info(ann_info, self.with_mask)
gt_bboxes = ann['bboxes'] gt_bboxes = ann['bboxes']
gt_labels = ann['labels'] gt_labels = ann['labels']
gt_bboxes_ignore = ann['bboxes_ignore'] gt_bboxes_ignore = ann['bboxes_ignore']
...@@ -223,10 +228,8 @@ class CocoDataset(Dataset): ...@@ -223,10 +228,8 @@ class CocoDataset(Dataset):
scale_factor, flip) scale_factor, flip)
if self.with_mask: if self.with_mask:
gt_mask_polys, gt_poly_lens, num_polys_per_mask = \ gt_masks = self.mask_transform(ann['masks'], pad_shape,
self.mask_transform( scale_factor, flip)
ann['mask_polys'], ann['poly_lens'],
img_info['height'], img_info['width'], flip)
ori_shape = (img_info['height'], img_info['width'], 3) ori_shape = (img_info['height'], img_info['width'], 3)
img_meta = dict( img_meta = dict(
...@@ -247,10 +250,7 @@ class CocoDataset(Dataset): ...@@ -247,10 +250,7 @@ class CocoDataset(Dataset):
if self.with_crowd: if self.with_crowd:
data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
if self.with_mask: if self.with_mask:
data['gt_masks'] = dict( data['gt_masks'] = DC(gt_masks, cpu_only=True)
polys=DC(gt_mask_polys, cpu_only=True),
poly_lens=DC(gt_poly_lens, cpu_only=True),
polys_per_mask=DC(num_polys_per_mask, cpu_only=True))
return data return data
def prepare_test_img(self, idx): def prepare_test_img(self, idx):
......
...@@ -10,7 +10,8 @@ __all__ = [ ...@@ -10,7 +10,8 @@ __all__ = [
class ImageTransform(object): class ImageTransform(object):
"""Preprocess an image """Preprocess an image.
1. rescale the image to expected size 1. rescale the image to expected size
2. normalize the image 2. normalize the image
3. flip the image (if needed) 3. flip the image (if needed)
...@@ -59,7 +60,8 @@ def bbox_flip(bboxes, img_shape): ...@@ -59,7 +60,8 @@ def bbox_flip(bboxes, img_shape):
class BboxTransform(object): class BboxTransform(object):
"""Preprocess gt bboxes """Preprocess gt bboxes.
1. rescale bboxes according to image size 1. rescale bboxes according to image size
2. flip bboxes (if needed) 2. flip bboxes (if needed)
3. pad the first dimension to `max_num_gts` 3. pad the first dimension to `max_num_gts`
...@@ -84,17 +86,12 @@ class BboxTransform(object): ...@@ -84,17 +86,12 @@ class BboxTransform(object):
class PolyMaskTransform(object): class PolyMaskTransform(object):
"""Preprocess polygons."""
def __init__(self): def __init__(self):
pass pass
def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False):
"""
Args:
gt_mask_polys(list): a list of masks, each mask is a list of polys,
each poly is a list of numbers
gt_poly_lens(list): a list of int, indicating the size of each poly
"""
if flip: if flip:
gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w) gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w)
num_polys_per_mask = np.array( num_polys_per_mask = np.array(
...@@ -108,6 +105,28 @@ class PolyMaskTransform(object): ...@@ -108,6 +105,28 @@ class PolyMaskTransform(object):
return gt_mask_polys, gt_poly_lens, num_polys_per_mask return gt_mask_polys, gt_poly_lens, num_polys_per_mask
class MaskTransform(object):
"""Preprocess masks.
1. resize masks to expected size and stack to a single array
2. flip the masks (if needed)
3. pad the masks (if needed)
"""
def __call__(self, masks, pad_shape, scale_factor, flip=False):
masks = [
mmcv.imrescale(mask, scale_factor, interpolation='nearest')
for mask in masks
]
if flip:
masks = [mask[:, ::-1] for mask in masks]
padded_masks = [
mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
]
padded_masks = np.stack(padded_masks, axis=0)
return padded_masks
class Numpy2Tensor(object): class Numpy2Tensor(object):
def __init__(self): def __init__(self):
......
from .data_container import DataContainer from .data_container import DataContainer
from .misc import * from .misc import to_tensor, random_scale, show_ann
__all__ = ['DataContainer', 'to_tensor', 'random_scale', 'show_ann']
from .detectors import * from .detectors import BaseDetector, RPN, FasterRCNN, MaskRCNN
from .builder import * from .builder import (build_neck, build_rpn_head, build_roi_extractor,
build_bbox_head, build_mask_head, build_detector)
__all__ = [
'BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 'build_backbone',
'build_neck', 'build_rpn_head', 'build_roi_extractor', 'build_bbox_head',
'build_mask_head', 'build_detector'
]
from .resnet import resnet from .resnet import resnet
__all__ = ['resnet']
...@@ -43,17 +43,21 @@ class ConvFCRoIHead(BBoxHead): ...@@ -43,17 +43,21 @@ class ConvFCRoIHead(BBoxHead):
self.fc_out_channels = fc_out_channels self.fc_out_channels = fc_out_channels
# add shared convs and fcs # add shared convs and fcs
self.shared_convs, self.shared_fcs, last_layer_dim = self._add_conv_fc_branch( self.shared_convs, self.shared_fcs, last_layer_dim = \
self.num_shared_convs, self.num_shared_fcs, self.in_channels, True) self._add_conv_fc_branch(
self.num_shared_convs, self.num_shared_fcs, self.in_channels,
True)
self.shared_out_channels = last_layer_dim self.shared_out_channels = last_layer_dim
# add cls specific branch # add cls specific branch
self.cls_convs, self.cls_fcs, self.cls_last_dim = self._add_conv_fc_branch( self.cls_convs, self.cls_fcs, self.cls_last_dim = \
self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels) self._add_conv_fc_branch(
self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels)
# add reg specific branch # add reg specific branch
self.reg_convs, self.reg_fcs, self.reg_last_dim = self._add_conv_fc_branch( self.reg_convs, self.reg_fcs, self.reg_last_dim = \
self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels) self._add_conv_fc_branch(
self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels)
if self.num_shared_fcs == 0 and not self.with_avg_pool: if self.num_shared_fcs == 0 and not self.with_avg_pool:
if self.num_cls_fcs == 0: if self.num_cls_fcs == 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment