Unverified Commit 7d343fd2 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

Merge pull request #8 from hellock/dev

API cleaning and code refactoring (WIP)
parents 0e0b9246 630687f4
...@@ -6,32 +6,27 @@ import time ...@@ -6,32 +6,27 @@ import time
import mmcv import mmcv
import numpy as np import numpy as np
import torch import torch
from mmcv.torchpack import Hook from mmcv.runner import Hook, obj_from_dict
from mmdet.datasets import collate from mmcv.parallel import scatter, collate
from mmdet.nn.parallel import scatter
from pycocotools.cocoeval import COCOeval from pycocotools.cocoeval import COCOeval
from torch.utils.data import Dataset
from ..eval import eval_recalls from .coco_utils import results2json, fast_eval_recall
from mmdet import datasets
__all__ = [
'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook',
'CocoDistEvalmAPHook'
]
class EmptyCacheHook(Hook):
def before_epoch(self, runner):
torch.cuda.empty_cache()
def after_epoch(self, runner):
torch.cuda.empty_cache()
class DistEvalHook(Hook): class DistEvalHook(Hook):
def __init__(self, dataset, interval=1): def __init__(self, dataset, interval=1):
self.dataset = dataset if isinstance(dataset, Dataset):
self.dataset = dataset
elif isinstance(dataset, dict):
self.dataset = obj_from_dict(dataset, datasets,
{'test_mode': True})
else:
raise TypeError(
'dataset must be a Dataset object or a dict, not {}'.format(
type(dataset)))
self.interval = interval self.interval = interval
self.lock_dir = None self.lock_dir = None
...@@ -68,17 +63,14 @@ class DistEvalHook(Hook): ...@@ -68,17 +63,14 @@ class DistEvalHook(Hook):
prog_bar = mmcv.ProgressBar(len(self.dataset)) prog_bar = mmcv.ProgressBar(len(self.dataset))
for idx in range(runner.rank, len(self.dataset), runner.world_size): for idx in range(runner.rank, len(self.dataset), runner.world_size):
data = self.dataset[idx] data = self.dataset[idx]
device_id = torch.cuda.current_device() data_gpu = scatter(
imgs_data = tuple( collate([data], samples_per_gpu=1),
scatter(collate([data], samples_per_gpu=1), [device_id])[0]) [torch.cuda.current_device()])[0]
# compute output # compute output
with torch.no_grad(): with torch.no_grad():
result = runner.model( result = runner.model(
*imgs_data, **data_gpu, return_loss=False, rescale=True)
return_loss=False,
return_bboxes=True,
rescale=True)
results[idx] = result results[idx] = result
batch_size = runner.world_size batch_size = runner.world_size
...@@ -106,129 +98,34 @@ class DistEvalHook(Hook): ...@@ -106,129 +98,34 @@ class DistEvalHook(Hook):
raise NotImplementedError raise NotImplementedError
class CocoEvalMixin(object): class CocoDistEvalRecallHook(DistEvalHook):
def _xyxy2xywh(self, bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]
def det2json(self, dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = self._xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
json_results.append(data)
return json_results
def segm2json(self, dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = self._xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
def proposal2json(self, dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = self._xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results
def results2json(self, dataset, results, out_file):
if isinstance(results[0], list):
json_results = self.det2json(dataset, results)
elif isinstance(results[0], tuple):
json_results = self.segm2json(dataset, results)
elif isinstance(results[0], np.ndarray):
json_results = self.proposal2json(dataset, results)
else:
raise TypeError('invalid type of results')
mmcv.dump(json_results, out_file, file_format='json')
class DistEvalRecallHook(DistEvalHook):
def __init__(self, def __init__(self,
dataset, dataset,
proposal_nums=(100, 300, 1000), proposal_nums=(100, 300, 1000),
iou_thrs=np.arange(0.5, 0.96, 0.05)): iou_thrs=np.arange(0.5, 0.96, 0.05)):
super(DistEvalRecallHook, self).__init__(dataset) super(CocoDistEvalRecallHook, self).__init__(dataset)
self.proposal_nums = np.array(proposal_nums, dtype=np.int32) self.proposal_nums = np.array(proposal_nums, dtype=np.int32)
self.iou_thrs = np.array(iou_thrs, dtype=np.float32) self.iou_thrs = np.array(iou_thrs, dtype=np.float32)
def evaluate(self, runner, results): def evaluate(self, runner, results):
# official coco evaluation is too slow, here we use our own # the official coco evaluation is too slow, here we use our own
# implementation, which may get slightly different results # implementation instead, which may get slightly different results
gt_bboxes = [] ar = fast_eval_recall(results, self.dataset.coco, self.proposal_nums,
for i in range(len(self.dataset)): self.iou_thrs)
img_id = self.dataset.img_ids[i]
ann_ids = self.dataset.coco.getAnnIds(imgIds=img_id)
ann_info = self.dataset.coco.loadAnns(ann_ids)
if len(ann_info) == 0:
gt_bboxes.append(np.zeros((0, 4)))
continue
bboxes = []
for ann in ann_info:
if ann.get('ignore', False) or ann['iscrowd']:
continue
x1, y1, w, h = ann['bbox']
bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
bboxes = np.array(bboxes, dtype=np.float32)
if bboxes.shape[0] == 0:
bboxes = np.zeros((0, 4))
gt_bboxes.append(bboxes)
recalls = eval_recalls(
gt_bboxes,
results,
self.proposal_nums,
self.iou_thrs,
print_summary=False)
ar = recalls.mean(axis=1)
for i, num in enumerate(self.proposal_nums): for i, num in enumerate(self.proposal_nums):
runner.log_buffer.output['AR@{}'.format(num)] = ar[i] runner.log_buffer.output['AR@{}'.format(num)] = ar[i]
runner.log_buffer.ready = True runner.log_buffer.ready = True
class CocoDistEvalmAPHook(DistEvalHook, CocoEvalMixin): class CocoDistEvalmAPHook(DistEvalHook):
def evaluate(self, runner, results): def evaluate(self, runner, results):
tmp_file = osp.join(runner.work_dir, 'temp_0.json') tmp_file = osp.join(runner.work_dir, 'temp_0.json')
self.results2json(self.dataset, results, tmp_file) results2json(self.dataset, results, tmp_file)
res_types = ['bbox', 'segm'] if runner.model.with_mask else ['bbox'] res_types = ['bbox',
'segm'] if runner.model.module.with_mask else ['bbox']
cocoGt = self.dataset.coco cocoGt = self.dataset.coco
cocoDt = cocoGt.loadRes(tmp_file) cocoDt = cocoGt.loadRes(tmp_file)
imgIds = cocoGt.getImgIds() imgIds = cocoGt.getImgIds()
......
...@@ -9,9 +9,9 @@ def average_precision(recalls, precisions, mode='area'): ...@@ -9,9 +9,9 @@ def average_precision(recalls, precisions, mode='area'):
"""Calculate average precision (for single or multiple scales). """Calculate average precision (for single or multiple scales).
Args: Args:
recalls(ndarray): shape (num_scales, num_dets) or (num_dets, ) recalls (ndarray): shape (num_scales, num_dets) or (num_dets, )
precisions(ndarray): shape (num_scales, num_dets) or (num_dets, ) precisions (ndarray): shape (num_scales, num_dets) or (num_dets, )
mode(str): 'area' or '11points', 'area' means calculating the area mode (str): 'area' or '11points', 'area' means calculating the area
under precision-recall curve, '11points' means calculating under precision-recall curve, '11points' means calculating
the average precision of recalls at [0, 0.1, ..., 1] the average precision of recalls at [0, 0.1, ..., 1]
...@@ -60,11 +60,11 @@ def tpfp_imagenet(det_bboxes, ...@@ -60,11 +60,11 @@ def tpfp_imagenet(det_bboxes,
"""Check if detected bboxes are true positive or false positive. """Check if detected bboxes are true positive or false positive.
Args: Args:
det_bbox(ndarray): the detected bbox det_bbox (ndarray): the detected bbox
gt_bboxes(ndarray): ground truth bboxes of this image gt_bboxes (ndarray): ground truth bboxes of this image
gt_ignore(ndarray): indicate if gts are ignored for evaluation or not gt_ignore (ndarray): indicate if gts are ignored for evaluation or not
default_iou_thr(float): the iou thresholds for medium and large bboxes default_iou_thr (float): the iou thresholds for medium and large bboxes
area_ranges(list or None): gt bbox area ranges area_ranges (list or None): gt bbox area ranges
Returns: Returns:
tuple: two arrays (tp, fp) whose elements are 0 and 1 tuple: two arrays (tp, fp) whose elements are 0 and 1
...@@ -115,10 +115,10 @@ def tpfp_imagenet(det_bboxes, ...@@ -115,10 +115,10 @@ def tpfp_imagenet(det_bboxes,
max_iou = ious[i, j] max_iou = ious[i, j]
matched_gt = j matched_gt = j
# there are 4 cases for a det bbox: # there are 4 cases for a det bbox:
# 1. this det bbox matches a gt, tp = 1, fp = 0 # 1. it matches a gt, tp = 1, fp = 0
# 2. this det bbox matches an ignored gt, tp = 0, fp = 0 # 2. it matches an ignored gt, tp = 0, fp = 0
# 3. this det bbox matches no gt and within area range, tp = 0, fp = 1 # 3. it matches no gt and within area range, tp = 0, fp = 1
# 4. this det bbox matches no gt but is beyond area range, tp = 0, fp = 0 # 4. it matches no gt but is beyond area range, tp = 0, fp = 0
if matched_gt >= 0: if matched_gt >= 0:
gt_covered[matched_gt] = 1 gt_covered[matched_gt] = 1
if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]): if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]):
...@@ -137,10 +137,10 @@ def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None): ...@@ -137,10 +137,10 @@ def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None):
"""Check if detected bboxes are true positive or false positive. """Check if detected bboxes are true positive or false positive.
Args: Args:
det_bbox(ndarray): the detected bbox det_bbox (ndarray): the detected bbox
gt_bboxes(ndarray): ground truth bboxes of this image gt_bboxes (ndarray): ground truth bboxes of this image
gt_ignore(ndarray): indicate if gts are ignored for evaluation or not gt_ignore (ndarray): indicate if gts are ignored for evaluation or not
iou_thr(float): the iou thresholds iou_thr (float): the iou thresholds
Returns: Returns:
tuple: (tp, fp), two arrays whose elements are 0 and 1 tuple: (tp, fp), two arrays whose elements are 0 and 1
...@@ -227,15 +227,16 @@ def eval_map(det_results, ...@@ -227,15 +227,16 @@ def eval_map(det_results,
"""Evaluate mAP of a dataset. """Evaluate mAP of a dataset.
Args: Args:
det_results(list): a list of list, [[cls1_det, cls2_det, ...], ...] det_results (list): a list of list, [[cls1_det, cls2_det, ...], ...]
gt_bboxes(list): ground truth bboxes of each image, a list of K*4 array gt_bboxes (list): ground truth bboxes of each image, a list of K*4
gt_labels(list): ground truth labels of each image, a list of K array array.
gt_ignore(list): gt ignore indicators of each image, a list of K array gt_labels (list): ground truth labels of each image, a list of K array
scale_ranges(list, optional): [(min1, max1), (min2, max2), ...] gt_ignore (list): gt ignore indicators of each image, a list of K array
iou_thr(float): IoU threshold scale_ranges (list, optional): [(min1, max1), (min2, max2), ...]
dataset(None or str): dataset name, there are minor differences in iou_thr (float): IoU threshold
dataset (None or str): dataset name, there are minor differences in
metrics for different datsets, e.g. "voc07", "imagenet_det", etc. metrics for different datsets, e.g. "voc07", "imagenet_det", etc.
print_summary(bool): whether to print the mAP summary print_summary (bool): whether to print the mAP summary
Returns: Returns:
tuple: (mAP, [dict, dict, ...]) tuple: (mAP, [dict, dict, ...])
...@@ -265,7 +266,8 @@ def eval_map(det_results, ...@@ -265,7 +266,8 @@ def eval_map(det_results,
area_ranges) for j in range(len(cls_dets)) area_ranges) for j in range(len(cls_dets))
] ]
tp, fp = tuple(zip(*tpfp)) tp, fp = tuple(zip(*tpfp))
# calculate gt number of each scale, gts ignored or beyond scale are not counted # calculate gt number of each scale, gts ignored or beyond scale
# are not counted
num_gts = np.zeros(num_scales, dtype=int) num_gts = np.zeros(num_scales, dtype=int)
for j, bbox in enumerate(cls_gts): for j, bbox in enumerate(cls_gts):
if area_ranges is None: if area_ranges is None:
......
from .losses import (weighted_nll_loss, weighted_cross_entropy,
weighted_binary_cross_entropy, sigmoid_focal_loss,
weighted_sigmoid_focal_loss, mask_cross_entropy,
smooth_l1_loss, weighted_smoothl1, accuracy)
__all__ = [
'weighted_nll_loss', 'weighted_cross_entropy',
'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss',
'weighted_smoothl1', 'accuracy'
]
# TODO merge naive and weighted loss to one function. # TODO merge naive and weighted loss.
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from ..bbox_ops import bbox_transform_inv, bbox_overlaps
def weighted_nll_loss(pred, label, weight, avg_factor=None):
if avg_factor is None:
avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
raw = F.nll_loss(pred, label, reduction='none')
return torch.sum(raw * weight)[None] / avg_factor
def weighted_nll_loss(pred, label, weight, ave_factor=None):
if ave_factor is None:
ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
raw = F.nll_loss(pred, label, size_average=False, reduce=False)
return torch.sum(raw * weight)[None] / ave_factor
def weighted_cross_entropy(pred, label, weight, avg_factor=None):
if avg_factor is None:
avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
raw = F.cross_entropy(pred, label, reduction='none')
return torch.sum(raw * weight)[None] / avg_factor
def weighted_cross_entropy(pred, label, weight, ave_factor=None):
if ave_factor is None:
ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
raw = F.cross_entropy(pred, label, size_average=False, reduce=False)
return torch.sum(raw * weight)[None] / ave_factor
def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None):
def weighted_binary_cross_entropy(pred, label, weight, ave_factor=None): if avg_factor is None:
if ave_factor is None: avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
return F.binary_cross_entropy_with_logits( return F.binary_cross_entropy_with_logits(
pred, label.float(), weight.float(), pred, label.float(), weight.float(),
size_average=False)[None] / ave_factor reduction='sum')[None] / avg_factor
def sigmoid_focal_loss(pred, def sigmoid_focal_loss(pred,
...@@ -32,13 +30,13 @@ def sigmoid_focal_loss(pred, ...@@ -32,13 +30,13 @@ def sigmoid_focal_loss(pred,
weight, weight,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
size_average=True): reduction='elementwise_mean'):
pred_sigmoid = pred.sigmoid() pred_sigmoid = pred.sigmoid()
pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
weight = (alpha * target + (1 - alpha) * (1 - target)) * weight weight = (alpha * target + (1 - alpha) * (1 - target)) * weight
weight = weight * pt.pow(gamma) weight = weight * pt.pow(gamma)
return F.binary_cross_entropy_with_logits( return F.binary_cross_entropy_with_logits(
pred, target, weight, size_average=size_average) pred, target, weight, reduction=reduction)
def weighted_sigmoid_focal_loss(pred, def weighted_sigmoid_focal_loss(pred,
...@@ -46,13 +44,13 @@ def weighted_sigmoid_focal_loss(pred, ...@@ -46,13 +44,13 @@ def weighted_sigmoid_focal_loss(pred,
weight, weight,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
ave_factor=None, avg_factor=None,
num_classes=80): num_classes=80):
if ave_factor is None: if avg_factor is None:
ave_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6
return sigmoid_focal_loss( return sigmoid_focal_loss(
pred, target, weight, gamma=gamma, alpha=alpha, pred, target, weight, gamma=gamma, alpha=alpha,
size_average=False)[None] / ave_factor reduction='sum')[None] / avg_factor
def mask_cross_entropy(pred, target, label): def mask_cross_entropy(pred, target, label):
...@@ -60,37 +58,30 @@ def mask_cross_entropy(pred, target, label): ...@@ -60,37 +58,30 @@ def mask_cross_entropy(pred, target, label):
inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
pred_slice = pred[inds, label].squeeze(1) pred_slice = pred[inds, label].squeeze(1)
return F.binary_cross_entropy_with_logits( return F.binary_cross_entropy_with_logits(
pred_slice, target, size_average=True)[None] pred_slice, target, reduction='elementwise_mean')[None]
def weighted_mask_cross_entropy(pred, target, weight, label):
num_rois = pred.size()[0]
num_samples = torch.sum(weight > 0).float().item() + 1e-6
assert num_samples >= 1
inds = torch.arange(0, num_rois).long().cuda()
pred_slice = pred[inds, label].squeeze(1)
return F.binary_cross_entropy_with_logits(
pred_slice, target, weight, size_average=False)[None] / num_samples
def smooth_l1_loss(pred, target, beta=1.0, size_average=True, reduce=True): def smooth_l1_loss(pred, target, beta=1.0, reduction='elementwise_mean'):
assert beta > 0 assert beta > 0
assert pred.size() == target.size() and target.numel() > 0 assert pred.size() == target.size() and target.numel() > 0
diff = torch.abs(pred - target) diff = torch.abs(pred - target)
loss = torch.where(diff < beta, 0.5 * diff * diff / beta, loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
diff - 0.5 * beta) diff - 0.5 * beta)
if size_average: reduction = F._Reduction.get_enum(reduction)
loss /= pred.numel() # none: 0, elementwise_mean:1, sum: 2
if reduce: if reduction == 0:
loss = loss.sum() return loss
return loss elif reduction == 1:
return loss.sum() / pred.numel()
elif reduction == 2:
def weighted_smoothl1(pred, target, weight, beta=1.0, ave_factor=None): return loss.sum()
if ave_factor is None:
ave_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
loss = smooth_l1_loss(pred, target, beta, size_average=False, reduce=False) def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None):
return torch.sum(loss * weight)[None] / ave_factor if avg_factor is None:
avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
loss = smooth_l1_loss(pred, target, beta, reduction='none')
return torch.sum(loss * weight)[None] / avg_factor
def accuracy(pred, target, topk=1): def accuracy(pred, target, topk=1):
......
from .losses import (
weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy,
sigmoid_focal_loss, weighted_sigmoid_focal_loss, mask_cross_entropy,
weighted_mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy)
__all__ = [
'weighted_nll_loss', 'weighted_cross_entropy',
'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
'weighted_sigmoid_focal_loss', 'mask_cross_entropy',
'weighted_mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1',
'accuracy'
]
from .utils import split_combined_polys
from .mask_target import mask_target
__all__ = ['split_combined_polys', 'mask_target']
import torch import torch
import numpy as np import numpy as np
import mmcv
from .segms import polys_to_mask_wrt_box
def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_polys_list, cfg):
img_meta, cfg):
cfg_list = [cfg for _ in range(len(pos_proposals_list))] cfg_list = [cfg for _ in range(len(pos_proposals_list))]
img_metas = [img_meta for _ in range(len(pos_proposals_list))]
mask_targets = map(mask_target_single, pos_proposals_list, mask_targets = map(mask_target_single, pos_proposals_list,
pos_assigned_gt_inds_list, gt_polys_list, img_metas, pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
cfg_list) mask_targets = torch.cat(list(mask_targets))
mask_targets = torch.cat(tuple(mask_targets), dim=0)
return mask_targets return mask_targets
def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_polys, def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
img_meta, cfg):
mask_size = cfg.mask_size mask_size = cfg.mask_size
num_pos = pos_proposals.size(0) num_pos = pos_proposals.size(0)
mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size)) mask_targets = []
if num_pos > 0: if num_pos > 0:
pos_proposals = pos_proposals.cpu().numpy() proposals_np = pos_proposals.cpu().numpy()
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
scale_factor = img_meta['scale_factor'][0].cpu().numpy()
for i in range(num_pos): for i in range(num_pos):
bbox = pos_proposals[i, :] / scale_factor gt_mask = gt_masks[pos_assigned_gt_inds[i]]
polys = gt_polys[pos_assigned_gt_inds[i]] bbox = proposals_np[i, :].astype(np.int32)
mask = polys_to_mask_wrt_box(polys, bbox, mask_size) x1, y1, x2, y2 = bbox
mask = np.array(mask > 0, dtype=np.float32) w = np.maximum(x2 - x1 + 1, 1)
mask_targets[i, ...] = torch.from_numpy(mask).to( h = np.maximum(y2 - y1 + 1, 1)
mask_targets.device) # mask is uint8 both before and after resizing
target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
(mask_size, mask_size))
mask_targets.append(target)
mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
pos_proposals.device)
else:
mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
return mask_targets return mask_targets
import cvbase as cvb
import numpy as np
import pycocotools.mask as mask_utils
import mmcv import mmcv
def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): def split_combined_polys(polys, poly_lens, polys_per_mask):
"""Split the combined 1-D polys into masks. """Split the combined 1-D polys into masks.
A mask is represented as a list of polys, and a poly is represented as A mask is represented as a list of polys, and a poly is represented as
...@@ -13,9 +9,9 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): ...@@ -13,9 +9,9 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask):
tensor. Here we need to split the tensor into original representations. tensor. Here we need to split the tensor into original representations.
Args: Args:
gt_polys (list): a list (length = image num) of 1-D tensors polys (list): a list (length = image num) of 1-D tensors
gt_poly_lens (list): a list (length = image num) of poly length poly_lens (list): a list (length = image num) of poly length
num_polys_per_mask (list): a list (length = image num) of poly number polys_per_mask (list): a list (length = image num) of poly number
of each mask of each mask
Returns: Returns:
...@@ -23,13 +19,12 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): ...@@ -23,13 +19,12 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask):
list (length = poly num) of numpy array list (length = poly num) of numpy array
""" """
mask_polys_list = [] mask_polys_list = []
for img_id in range(len(gt_polys)): for img_id in range(len(polys)):
gt_polys_single = gt_polys[img_id].cpu().numpy() polys_single = polys[img_id]
gt_polys_lens_single = gt_poly_lens[img_id].cpu().numpy().tolist() polys_lens_single = poly_lens[img_id].tolist()
num_polys_per_mask_single = num_polys_per_mask[ polys_per_mask_single = polys_per_mask[img_id].tolist()
img_id].cpu().numpy().tolist()
split_gt_polys = mmcv.slice_list(gt_polys_single, gt_polys_lens_single) split_polys = mmcv.slice_list(polys_single, polys_lens_single)
mask_polys = mmcv.slice_list(split_gt_polys, num_polys_per_mask_single) mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
mask_polys_list.append(mask_polys) mask_polys_list.append(mask_polys)
return mask_polys_list return mask_polys_list
from .segms import (flip_segms, polys_to_mask, mask_to_bbox,
polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting,
rle_mask_nms, rle_masks_to_boxes)
from .utils import split_combined_gt_polys
from .mask_target import mask_target
__all__ = [
'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box',
'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes',
'split_combined_gt_polys', 'mask_target'
]
# This file is copied from Detectron.
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Functions for interacting with segmentation masks in the COCO format.
The following terms are used in this module
mask: a binary mask encoded as a 2D numpy array
segm: a segmentation mask in one of the two COCO formats (polygon or RLE)
polygon: COCO's polygon format
RLE: COCO's run length encoding format
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import pycocotools.mask as mask_util
def flip_segms(segms, height, width):
"""Left/right flip each mask in a list of masks."""
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
if 'counts' in rle and type(rle['counts']) == list:
# Magic RLE format handling painfully discovered by looking at the
# COCO API showAnns function.
rle = mask_util.frPyObjects([rle], height, width)
mask = mask_util.decode(rle)
mask = mask[:, ::-1, :]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
flipped_segms = []
for segm in segms:
if type(segm) == list:
# Polygon format
flipped_segms.append([_flip_poly(poly, width) for poly in segm])
else:
# RLE format
assert type(segm) == dict
flipped_segms.append(_flip_rle(segm, height, width))
return flipped_segms
def polys_to_mask(polygons, height, width):
"""Convert from the COCO polygon segmentation format to a binary mask
encoded as a 2D array of data type numpy.float32. The polygon segmentation
is understood to be enclosed inside a height x width image. The resulting
mask is therefore of shape (height, width).
"""
rle = mask_util.frPyObjects(polygons, height, width)
mask = np.array(mask_util.decode(rle), dtype=np.float32)
# Flatten in case polygons was a list
mask = np.sum(mask, axis=2)
mask = np.array(mask > 0, dtype=np.float32)
return mask
def mask_to_bbox(mask):
"""Compute the tight bounding box of a binary mask."""
xs = np.where(np.sum(mask, axis=0) > 0)[0]
ys = np.where(np.sum(mask, axis=1) > 0)[0]
if len(xs) == 0 or len(ys) == 0:
return None
x0 = xs[0]
x1 = xs[-1]
y0 = ys[0]
y1 = ys[-1]
return np.array((x0, y0, x1, y1), dtype=np.float32)
def polys_to_mask_wrt_box(polygons, box, M):
"""Convert from the COCO polygon segmentation format to a binary mask
encoded as a 2D array of data type numpy.float32. The polygon segmentation
is understood to be enclosed in the given box and rasterized to an M x M
mask. The resulting mask is therefore of shape (M, M).
"""
w = box[2] - box[0]
h = box[3] - box[1]
w = np.maximum(w, 1)
h = np.maximum(h, 1)
polygons_norm = []
for poly in polygons:
p = np.array(poly, dtype=np.float32)
p[0::2] = (p[0::2] - box[0]) * M / w
p[1::2] = (p[1::2] - box[1]) * M / h
polygons_norm.append(p)
rle = mask_util.frPyObjects(polygons_norm, M, M)
mask = np.array(mask_util.decode(rle), dtype=np.float32)
# Flatten in case polygons was a list
mask = np.sum(mask, axis=2)
mask = np.array(mask > 0, dtype=np.float32)
return mask
def polys_to_boxes(polys):
"""Convert a list of polygons into an array of tight bounding boxes."""
boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
for i in range(len(polys)):
poly = polys[i]
x0 = min(min(p[::2]) for p in poly)
x1 = max(max(p[::2]) for p in poly)
y0 = min(min(p[1::2]) for p in poly)
y1 = max(max(p[1::2]) for p in poly)
boxes_from_polys[i, :] = [x0, y0, x1, y1]
return boxes_from_polys
def rle_mask_voting(top_masks,
all_masks,
all_dets,
iou_thresh,
binarize_thresh,
method='AVG'):
"""Returns new masks (in correspondence with `top_masks`) by combining
multiple overlapping masks coming from the pool of `all_masks`. Two methods
for combining masks are supported: 'AVG' uses a weighted average of
overlapping mask pixels; 'UNION' takes the union of all mask pixels.
"""
if len(top_masks) == 0:
return
all_not_crowd = [False] * len(all_masks)
top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
decoded_all_masks = [
np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
]
decoded_top_masks = [
np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
]
all_boxes = all_dets[:, :4].astype(np.int32)
all_scores = all_dets[:, 4]
# Fill box support with weights
mask_shape = decoded_all_masks[0].shape
mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
for k in range(len(all_masks)):
ref_box = all_boxes[k]
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, mask_shape[1])
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, mask_shape[0])
mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
mask_weights = np.maximum(mask_weights, 1e-5)
top_segms_out = []
for k in range(len(top_masks)):
# Corner case of empty mask
if decoded_top_masks[k].sum() == 0:
top_segms_out.append(top_masks[k])
continue
inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
# Only matches itself
if len(inds_to_vote) == 1:
top_segms_out.append(top_masks[k])
continue
masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
if method == 'AVG':
ws = mask_weights[inds_to_vote]
soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
elif method == 'UNION':
# Any pixel that's on joins the mask
soft_mask = np.sum(masks_to_vote, axis=0)
mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
else:
raise NotImplementedError('Method {} is unknown'.format(method))
rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
top_segms_out.append(rle)
return top_segms_out
def rle_mask_nms(masks, dets, thresh, mode='IOU'):
"""Performs greedy non-maximum suppression based on an overlap measurement
between masks. The type of measurement is determined by `mode` and can be
either 'IOU' (standard intersection over union) or 'IOMA' (intersection over
mininum area).
"""
if len(masks) == 0:
return []
if len(masks) == 1:
return [0]
if mode == 'IOU':
# Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))
all_not_crowds = [False] * len(masks)
ious = mask_util.iou(masks, masks, all_not_crowds)
elif mode == 'IOMA':
# Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))
all_crowds = [True] * len(masks)
# ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
ious = mask_util.iou(masks, masks, all_crowds)
# ... = max(area(intersect(m1, m2)) / area(m2),
# area(intersect(m2, m1)) / area(m1))
ious = np.maximum(ious, ious.transpose())
elif mode == 'CONTAINMENT':
# Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
# Which measures how much m2 is contained inside m1
all_crowds = [True] * len(masks)
ious = mask_util.iou(masks, masks, all_crowds)
else:
raise NotImplementedError('Mode {} is unknown'.format(mode))
scores = dets[:, 4]
order = np.argsort(-scores)
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ovr = ious[i, order[1:]]
inds_to_keep = np.where(ovr <= thresh)[0]
order = order[inds_to_keep + 1]
return keep
def rle_masks_to_boxes(masks):
"""Computes the bounding box of each mask in a list of RLE encoded masks."""
if len(masks) == 0:
return []
decoded_masks = [
np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks
]
def get_bounds(flat_mask):
inds = np.where(flat_mask > 0)[0]
return inds.min(), inds.max()
boxes = np.zeros((len(decoded_masks), 4))
keep = [True] * len(decoded_masks)
for i, mask in enumerate(decoded_masks):
if mask.sum() == 0:
keep[i] = False
continue
flat_mask = mask.sum(axis=0)
x0, x1 = get_bounds(flat_mask)
flat_mask = mask.sum(axis=1)
y0, y1 = get_bounds(flat_mask)
boxes[i, :] = (x0, y0, x1, y1)
return boxes, np.where(keep)[0]
import torch import torch
from mmdet.ops import nms
import numpy as np import numpy as np
from ..bbox_ops import bbox_mapping_back from mmdet.ops import nms
from ..bbox import bbox_mapping_back
def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
...@@ -21,11 +21,12 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): ...@@ -21,11 +21,12 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
""" """
recovered_proposals = [] recovered_proposals = []
for proposals, img_info in zip(aug_proposals, img_metas): for proposals, img_info in zip(aug_proposals, img_metas):
shape_scale = img_info['shape_scale'][0] img_shape = img_info['img_shape']
flip = img_info['flip'][0] scale_factor = img_info['scale_factor']
flip = img_info['flip']
_proposals = proposals.clone() _proposals = proposals.clone()
_proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], shape_scale, _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
flip) scale_factor, flip)
recovered_proposals.append(_proposals) recovered_proposals.append(_proposals)
aug_proposals = torch.cat(recovered_proposals, dim=0) aug_proposals = torch.cat(recovered_proposals, dim=0)
nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr, nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr,
...@@ -53,9 +54,10 @@ def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): ...@@ -53,9 +54,10 @@ def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
""" """
recovered_bboxes = [] recovered_bboxes = []
for bboxes, img_info in zip(aug_bboxes, img_metas): for bboxes, img_info in zip(aug_bboxes, img_metas):
shape_scale = img_info['shape_scale'][0] img_shape = img_info[0]['img_shape']
flip = img_info['flip'][0] scale_factor = img_info[0]['scale_factor']
bboxes = bbox_mapping_back(bboxes, shape_scale, flip) flip = img_info[0]['flip']
bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
recovered_bboxes.append(bboxes) recovered_bboxes.append(bboxes)
bboxes = torch.stack(recovered_bboxes).mean(dim=0) bboxes = torch.stack(recovered_bboxes).mean(dim=0)
if aug_scores is None: if aug_scores is None:
...@@ -73,7 +75,7 @@ def merge_aug_scores(aug_scores): ...@@ -73,7 +75,7 @@ def merge_aug_scores(aug_scores):
return np.mean(aug_scores, axis=0) return np.mean(aug_scores, axis=0)
def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None): def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
"""Merge augmented mask prediction. """Merge augmented mask prediction.
Args: Args:
...@@ -85,7 +87,7 @@ def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None): ...@@ -85,7 +87,7 @@ def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None):
tuple: (bboxes, scores) tuple: (bboxes, scores)
""" """
recovered_masks = [ recovered_masks = [
mask if not img_info['flip'][0] else mask[..., ::-1] mask if not img_info[0]['flip'] else mask[..., ::-1]
for mask, img_info in zip(aug_masks, img_metas) for mask, img_info in zip(aug_masks, img_metas)
] ]
if weights is None: if weights is None:
......
from .anchor_generator import *
from .anchor_target import *
from mmdet.datasets import collate
from mmdet.nn.parallel import scatter
__all__ = ['_data_func']
def _data_func(data, gpu_id):
imgs, img_metas = tuple(
scatter(collate([data], samples_per_gpu=1), [gpu_id])[0])
return dict(
img=imgs,
img_meta=img_metas,
return_loss=False,
return_bboxes=True,
rescale=True)
import numpy as np
import torch
from collections import OrderedDict
from mmdet.nn.parallel import scatter
def parse_losses(losses):
log_vars = OrderedDict()
for loss_key, loss_value in losses.items():
if isinstance(loss_value, dict):
for _key, _value in loss_value.items():
if isinstance(_value, list):
_value = sum([_loss.mean() for _loss in _value])
else:
_value = _value.mean()
log_vars[_keys] = _value
elif isinstance(loss_value, list):
log_vars[loss_key] = sum(_loss.mean() for _loss in loss_value)
else:
log_vars[loss_key] = loss_value.mean()
loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
log_vars['loss'] = loss
for _key, _value in log_vars.items():
log_vars[_key] = _value.item()
return loss, log_vars
def batch_processor(model, data, train_mode, args=None):
data = scatter(data, [torch.cuda.current_device()])[0]
losses = model(**data)
loss, log_vars = parse_losses(losses)
outputs = dict(
loss=loss / args.world_size,
log_vars=log_vars,
num_samples=len(data['img'].data))
return outputs
from .dist_utils import * from .dist_utils import init_dist, allreduce_grads, DistOptimizerHook
from .hooks import * from .misc import tensor2imgs, unmap, multi_apply
from .misc import *
__all__ = [
'init_dist', 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs',
'unmap', 'multi_apply'
]
import os import os
from collections import OrderedDict
import torch import torch
import torch.multiprocessing as mp import torch.multiprocessing as mp
import torch.distributed as dist import torch.distributed as dist
from torch.nn.utils import clip_grad from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
from mmcv.torchpack import Hook, OptimizerStepperHook _take_tensors)
from mmcv.runner import OptimizerHook
__all__ = [
'init_dist', 'average_gradients', 'broadcast_params',
'DistOptimizerStepperHook', 'DistSamplerSeedHook'
]
def init_dist(world_size, def init_dist(launcher, backend='nccl', **kwargs):
rank,
backend='gloo',
master_ip='127.0.0.1',
port=29500):
if mp.get_start_method(allow_none=True) is None: if mp.get_start_method(allow_none=True) is None:
mp.set_start_method('spawn') mp.set_start_method('spawn')
if launcher == 'pytorch':
_init_dist_pytorch(backend, **kwargs)
elif launcher == 'mpi':
_init_dist_mpi(backend, **kwargs)
elif launcher == 'slurm':
_init_dist_slurm(backend, **kwargs)
else:
raise ValueError('Invalid launcher type: {}'.format(launcher))
def _init_dist_pytorch(backend, **kwargs):
# TODO: use local_rank instead of rank % num_gpus
rank = int(os.environ['RANK'])
num_gpus = torch.cuda.device_count() num_gpus = torch.cuda.device_count()
torch.cuda.set_device(rank % num_gpus) torch.cuda.set_device(rank % num_gpus)
os.environ['MASTER_ADDR'] = master_ip dist.init_process_group(backend=backend, **kwargs)
os.environ['MASTER_PORT'] = str(port)
if backend == 'nccl':
dist.init_process_group(backend='nccl') def _init_dist_mpi(backend, **kwargs):
raise NotImplementedError
def _init_dist_slurm(backend, **kwargs):
raise NotImplementedError
def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
if bucket_size_mb > 0:
bucket_size_bytes = bucket_size_mb * 1024 * 1024
buckets = _take_tensors(tensors, bucket_size_bytes)
else: else:
dist.init_process_group( buckets = OrderedDict()
backend='gloo', rank=rank, world_size=world_size) for tensor in tensors:
tp = tensor.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(tensor)
buckets = buckets.values()
for bucket in buckets:
flat_tensors = _flatten_dense_tensors(bucket)
dist.all_reduce(flat_tensors)
flat_tensors.div_(world_size)
for tensor, synced in zip(
bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
tensor.copy_(synced)
def average_gradients(model):
for param in model.parameters():
if param.requires_grad and not (param.grad is None):
dist.all_reduce(param.grad.data)
def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
grads = [
param.grad.data for param in model.parameters()
if param.requires_grad and param.grad is not None
]
world_size = dist.get_world_size()
if coalesce:
_allreduce_coalesced(grads, world_size, bucket_size_mb)
else:
for tensor in grads:
dist.all_reduce(tensor.div_(world_size))
def broadcast_params(model):
for p in model.state_dict().values():
dist.broadcast(p, 0)
class DistOptimizerHook(OptimizerHook):
class DistOptimizerStepperHook(OptimizerStepperHook): def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
self.grad_clip = grad_clip
self.coalesce = coalesce
self.bucket_size_mb = bucket_size_mb
def after_train_iter(self, runner): def after_train_iter(self, runner):
runner.optimizer.zero_grad() runner.optimizer.zero_grad()
runner.outputs['loss'].backward() runner.outputs['loss'].backward()
average_gradients(runner.model) allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
if self.grad_clip: if self.grad_clip is not None:
clip_grad.clip_grad_norm_( self.clip_grads(runner.model.parameters())
filter(lambda p: p.requires_grad, runner.model.parameters()),
max_norm=self.max_norm,
norm_type=self.norm_type)
runner.optimizer.step() runner.optimizer.step()
class DistSamplerSeedHook(Hook):
def before_epoch(self, runner):
runner.data_loader.sampler.set_epoch(runner.epoch)
import subprocess from functools import partial
import mmcv import mmcv
import numpy as np import numpy as np
import torch from six.moves import map, zip
__all__ = ['tensor2imgs', 'unique', 'unmap', 'results2json']
def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
def tensor2imgs(tensor, num_imgs = tensor.size(0)
color_order='RGB', mean = np.array(mean, dtype=np.float32)
color_mean=(0.485, 0.456, 0.406), std = np.array(std, dtype=np.float32)
color_std=(0.229, 0.224, 0.225)):
assert color_order in ['RGB', 'BGR']
img_per_gpu = tensor.size(0)
color_mean = np.array(color_mean, dtype=np.float32)
color_std = np.array(color_std, dtype=np.float32)
imgs = [] imgs = []
for img_id in range(img_per_gpu): for img_id in range(num_imgs):
img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
if color_order == 'RGB': img = mmcv.imdenormalize(
img = mmcv.rgb2bgr(img) img, mean, std, to_bgr=to_rgb).astype(np.uint8)
img = img * color_std + color_mean
imgs.append(np.ascontiguousarray(img)) imgs.append(np.ascontiguousarray(img))
return imgs return imgs
def unique(tensor): def multi_apply(func, *args, **kwargs):
if tensor.is_cuda: pfunc = partial(func, **kwargs) if kwargs else func
u_tensor = np.unique(tensor.cpu().numpy()) map_results = map(pfunc, *args)
return tensor.new_tensor(u_tensor) return tuple(map(list, zip(*map_results)))
else:
return torch.unique(tensor)
def unmap(data, count, inds, fill=0): def unmap(data, count, inds, fill=0):
...@@ -44,75 +35,3 @@ def unmap(data, count, inds, fill=0): ...@@ -44,75 +35,3 @@ def unmap(data, count, inds, fill=0):
ret = data.new_full(new_size, fill) ret = data.new_full(new_size, fill)
ret[inds, :] = data ret[inds, :] = data
return ret return ret
def xyxy2xywh(bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]
def det2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
json_results.append(data)
return json_results
def segm2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
def proposal2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results
def results2json(dataset, results, out_file):
if isinstance(results[0], list):
json_results = det2json(dataset, results)
elif isinstance(results[0], tuple):
json_results = segm2json(dataset, results)
elif isinstance(results[0], np.ndarray):
json_results = proposal2json(dataset, results)
else:
raise TypeError('invalid type of results')
mmcv.dump(json_results, out_file)
from .coco import CocoDataset from .coco import CocoDataset
from .collate import * from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
from .sampler import * from .utils import to_tensor, random_scale, show_ann
from .transforms import *
__all__ = [
'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'to_tensor', 'random_scale', 'show_ann'
]
...@@ -2,75 +2,17 @@ import os.path as osp ...@@ -2,75 +2,17 @@ import os.path as osp
import mmcv import mmcv
import numpy as np import numpy as np
from mmcv.parallel import DataContainer as DC
from pycocotools.coco import COCO from pycocotools.coco import COCO
from torch.utils.data import Dataset from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform, from .transforms import (ImageTransform, BboxTransform, MaskTransform,
Numpy2Tensor) Numpy2Tensor)
from .utils import show_ann, random_scale from .utils import to_tensor, show_ann, random_scale
from .utils import DataContainer as DC
def parse_ann_info(ann_info, cat2label, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
cat2label (dict): The mapping from category ids to labels.
with_mask (bool): Whether to parse mask annotations.
Returns:
tuple: gt_bboxes, gt_labels and gt_mask_info
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# each mask consists of one or several polys, each poly is a list of float.
if with_mask:
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(cat2label[ann['category_id']])
if with_mask:
# Note polys are not resized
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
class CocoDataset(Dataset): class CocoDataset(Dataset):
def __init__(self, def __init__(self,
ann_file, ann_file,
img_prefix, img_prefix,
...@@ -137,7 +79,7 @@ class CocoDataset(Dataset): ...@@ -137,7 +79,7 @@ class CocoDataset(Dataset):
self.img_transform = ImageTransform( self.img_transform = ImageTransform(
size_divisor=self.size_divisor, **self.img_norm_cfg) size_divisor=self.size_divisor, **self.img_norm_cfg)
self.bbox_transform = BboxTransform() self.bbox_transform = BboxTransform()
self.mask_transform = PolyMaskTransform() self.mask_transform = MaskTransform()
self.numpy2tensor = Numpy2Tensor() self.numpy2tensor = Numpy2Tensor()
def __len__(self): def __len__(self):
...@@ -161,6 +103,70 @@ class CocoDataset(Dataset): ...@@ -161,6 +103,70 @@ class CocoDataset(Dataset):
ann_info = self.coco.loadAnns(ann_ids) ann_info = self.coco.loadAnns(ann_ids)
return ann_info return ann_info
def _parse_ann_info(self, ann_info, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, mask_polys, poly_lens.
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# Two formats are provided.
# 1. mask: a binary map of the same size of the image.
# 2. polys: each mask consists of one or several polys, each poly is a
# list of float.
if with_mask:
gt_masks = []
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
if with_mask:
gt_masks.append(self.coco.annToMask(ann))
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['masks'] = gt_masks
# poly format is not used in the current implementation
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
def _set_group_flag(self): def _set_group_flag(self):
"""Set flag according to image aspect ratio. """Set flag according to image aspect ratio.
...@@ -199,7 +205,7 @@ class CocoDataset(Dataset): ...@@ -199,7 +205,7 @@ class CocoDataset(Dataset):
idx = self._rand_another(idx) idx = self._rand_another(idx)
continue continue
ann = parse_ann_info(ann_info, self.cat2label, self.with_mask) ann = self._parse_ann_info(ann_info, self.with_mask)
gt_bboxes = ann['bboxes'] gt_bboxes = ann['bboxes']
gt_labels = ann['labels'] gt_labels = ann['labels']
gt_bboxes_ignore = ann['bboxes_ignore'] gt_bboxes_ignore = ann['bboxes_ignore']
...@@ -211,7 +217,7 @@ class CocoDataset(Dataset): ...@@ -211,7 +217,7 @@ class CocoDataset(Dataset):
# apply transforms # apply transforms
flip = True if np.random.rand() < self.flip_ratio else False flip = True if np.random.rand() < self.flip_ratio else False
img_scale = random_scale(self.img_scales) # sample a scale img_scale = random_scale(self.img_scales) # sample a scale
img, img_shape, scale_factor = self.img_transform( img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip) img, img_scale, flip)
if self.proposals is not None: if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape, proposals = self.bbox_transform(proposals, img_shape,
...@@ -222,32 +228,29 @@ class CocoDataset(Dataset): ...@@ -222,32 +228,29 @@ class CocoDataset(Dataset):
scale_factor, flip) scale_factor, flip)
if self.with_mask: if self.with_mask:
gt_mask_polys, gt_poly_lens, num_polys_per_mask = \ gt_masks = self.mask_transform(ann['masks'], pad_shape,
self.mask_transform( scale_factor, flip)
ann['mask_polys'], ann['poly_lens'],
img_info['height'], img_info['width'], flip)
ori_shape = (img_info['height'], img_info['width']) ori_shape = (img_info['height'], img_info['width'], 3)
img_meta = dict( img_meta = dict(
ori_shape=DC(ori_shape), ori_shape=ori_shape,
img_shape=DC(img_shape), img_shape=img_shape,
scale_factor=DC(scale_factor), pad_shape=pad_shape,
flip=DC(flip)) scale_factor=scale_factor,
flip=flip)
data = dict( data = dict(
img=DC(img, stack=True), img=DC(to_tensor(img), stack=True),
img_meta=img_meta, img_meta=DC(img_meta, cpu_only=True),
gt_bboxes=DC(gt_bboxes)) gt_bboxes=DC(to_tensor(gt_bboxes)))
if self.proposals is not None: if self.proposals is not None:
data['proposals'] = DC(proposals) data['proposals'] = DC(to_tensor(proposals))
if self.with_label: if self.with_label:
data['gt_labels'] = DC(gt_labels) data['gt_labels'] = DC(to_tensor(gt_labels))
if self.with_crowd: if self.with_crowd:
data['gt_bboxes_ignore'] = DC(gt_bboxes_ignore) data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
if self.with_mask: if self.with_mask:
data['gt_mask_polys'] = DC(gt_mask_polys) data['gt_masks'] = DC(gt_masks, cpu_only=True)
data['gt_poly_lens'] = DC(gt_poly_lens)
data['num_polys_per_mask'] = DC(num_polys_per_mask)
return data return data
def prepare_test_img(self, idx): def prepare_test_img(self, idx):
...@@ -258,37 +261,38 @@ class CocoDataset(Dataset): ...@@ -258,37 +261,38 @@ class CocoDataset(Dataset):
if self.proposals is not None else None) if self.proposals is not None else None)
def prepare_single(img, scale, flip, proposal=None): def prepare_single(img, scale, flip, proposal=None):
_img, _img_shape, _scale_factor = self.img_transform( _img, img_shape, pad_shape, scale_factor = self.img_transform(
img, scale, flip) img, scale, flip)
img, img_shape, scale_factor = self.numpy2tensor( _img = to_tensor(_img)
_img, _img_shape, _scale_factor) _img_meta = dict(
ori_shape = (img_info['height'], img_info['width']) ori_shape=(img_info['height'], img_info['width'], 3),
img_meta = dict(
ori_shape=ori_shape,
img_shape=img_shape, img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor, scale_factor=scale_factor,
flip=flip) flip=flip)
if proposal is not None: if proposal is not None:
proposal = self.bbox_transform(proposal, _scale_factor, flip) _proposal = self.bbox_transform(proposal, scale_factor, flip)
proposal = self.numpy2tensor(proposal) _proposal = to_tensor(_proposal)
return img, img_meta, proposal else:
_proposal = None
return _img, _img_meta, _proposal
imgs = [] imgs = []
img_metas = [] img_metas = []
proposals = [] proposals = []
for scale in self.img_scales: for scale in self.img_scales:
img, img_meta, proposal = prepare_single(img, scale, False, _img, _img_meta, _proposal = prepare_single(
proposal) img, scale, False, proposal)
imgs.append(img) imgs.append(_img)
img_metas.append(img_meta) img_metas.append(DC(_img_meta, cpu_only=True))
proposals.append(proposal) proposals.append(_proposal)
if self.flip_ratio > 0: if self.flip_ratio > 0:
img, img_meta, prop = prepare_single(img, scale, True, _img, _img_meta, _proposal = prepare_single(
proposal) img, scale, True, proposal)
imgs.append(img) imgs.append(_img)
img_metas.append(img_meta) img_metas.append(DC(_img_meta, cpu_only=True))
proposals.append(prop) proposals.append(_proposal)
if self.proposals is None: data = dict(img=imgs, img_meta=img_metas)
return imgs, img_metas if self.proposals is not None:
else: data['proposals'] = proposals
return imgs, img_metas, proposals return data
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment