Unverified Commit 7d343fd2 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

Merge pull request #8 from hellock/dev

API cleaning and code refactoring (WIP)
parents 0e0b9246 630687f4
......@@ -6,32 +6,27 @@ import time
import mmcv
import numpy as np
import torch
from mmcv.torchpack import Hook
from mmdet.datasets import collate
from mmdet.nn.parallel import scatter
from mmcv.runner import Hook, obj_from_dict
from mmcv.parallel import scatter, collate
from pycocotools.cocoeval import COCOeval
from torch.utils.data import Dataset
from ..eval import eval_recalls
__all__ = [
'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook',
'CocoDistEvalmAPHook'
]
class EmptyCacheHook(Hook):
def before_epoch(self, runner):
torch.cuda.empty_cache()
def after_epoch(self, runner):
torch.cuda.empty_cache()
from .coco_utils import results2json, fast_eval_recall
from mmdet import datasets
class DistEvalHook(Hook):
def __init__(self, dataset, interval=1):
self.dataset = dataset
if isinstance(dataset, Dataset):
self.dataset = dataset
elif isinstance(dataset, dict):
self.dataset = obj_from_dict(dataset, datasets,
{'test_mode': True})
else:
raise TypeError(
'dataset must be a Dataset object or a dict, not {}'.format(
type(dataset)))
self.interval = interval
self.lock_dir = None
......@@ -68,17 +63,14 @@ class DistEvalHook(Hook):
prog_bar = mmcv.ProgressBar(len(self.dataset))
for idx in range(runner.rank, len(self.dataset), runner.world_size):
data = self.dataset[idx]
device_id = torch.cuda.current_device()
imgs_data = tuple(
scatter(collate([data], samples_per_gpu=1), [device_id])[0])
data_gpu = scatter(
collate([data], samples_per_gpu=1),
[torch.cuda.current_device()])[0]
# compute output
with torch.no_grad():
result = runner.model(
*imgs_data,
return_loss=False,
return_bboxes=True,
rescale=True)
**data_gpu, return_loss=False, rescale=True)
results[idx] = result
batch_size = runner.world_size
......@@ -106,129 +98,34 @@ class DistEvalHook(Hook):
raise NotImplementedError
class CocoEvalMixin(object):
def _xyxy2xywh(self, bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]
def det2json(self, dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = self._xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
json_results.append(data)
return json_results
def segm2json(self, dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = self._xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
def proposal2json(self, dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = self._xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results
def results2json(self, dataset, results, out_file):
if isinstance(results[0], list):
json_results = self.det2json(dataset, results)
elif isinstance(results[0], tuple):
json_results = self.segm2json(dataset, results)
elif isinstance(results[0], np.ndarray):
json_results = self.proposal2json(dataset, results)
else:
raise TypeError('invalid type of results')
mmcv.dump(json_results, out_file, file_format='json')
class DistEvalRecallHook(DistEvalHook):
class CocoDistEvalRecallHook(DistEvalHook):
def __init__(self,
dataset,
proposal_nums=(100, 300, 1000),
iou_thrs=np.arange(0.5, 0.96, 0.05)):
super(DistEvalRecallHook, self).__init__(dataset)
super(CocoDistEvalRecallHook, self).__init__(dataset)
self.proposal_nums = np.array(proposal_nums, dtype=np.int32)
self.iou_thrs = np.array(iou_thrs, dtype=np.float32)
def evaluate(self, runner, results):
# official coco evaluation is too slow, here we use our own
# implementation, which may get slightly different results
gt_bboxes = []
for i in range(len(self.dataset)):
img_id = self.dataset.img_ids[i]
ann_ids = self.dataset.coco.getAnnIds(imgIds=img_id)
ann_info = self.dataset.coco.loadAnns(ann_ids)
if len(ann_info) == 0:
gt_bboxes.append(np.zeros((0, 4)))
continue
bboxes = []
for ann in ann_info:
if ann.get('ignore', False) or ann['iscrowd']:
continue
x1, y1, w, h = ann['bbox']
bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
bboxes = np.array(bboxes, dtype=np.float32)
if bboxes.shape[0] == 0:
bboxes = np.zeros((0, 4))
gt_bboxes.append(bboxes)
recalls = eval_recalls(
gt_bboxes,
results,
self.proposal_nums,
self.iou_thrs,
print_summary=False)
ar = recalls.mean(axis=1)
# the official coco evaluation is too slow, here we use our own
# implementation instead, which may get slightly different results
ar = fast_eval_recall(results, self.dataset.coco, self.proposal_nums,
self.iou_thrs)
for i, num in enumerate(self.proposal_nums):
runner.log_buffer.output['AR@{}'.format(num)] = ar[i]
runner.log_buffer.ready = True
class CocoDistEvalmAPHook(DistEvalHook, CocoEvalMixin):
class CocoDistEvalmAPHook(DistEvalHook):
def evaluate(self, runner, results):
tmp_file = osp.join(runner.work_dir, 'temp_0.json')
self.results2json(self.dataset, results, tmp_file)
results2json(self.dataset, results, tmp_file)
res_types = ['bbox', 'segm'] if runner.model.with_mask else ['bbox']
res_types = ['bbox',
'segm'] if runner.model.module.with_mask else ['bbox']
cocoGt = self.dataset.coco
cocoDt = cocoGt.loadRes(tmp_file)
imgIds = cocoGt.getImgIds()
......
......@@ -9,9 +9,9 @@ def average_precision(recalls, precisions, mode='area'):
"""Calculate average precision (for single or multiple scales).
Args:
recalls(ndarray): shape (num_scales, num_dets) or (num_dets, )
precisions(ndarray): shape (num_scales, num_dets) or (num_dets, )
mode(str): 'area' or '11points', 'area' means calculating the area
recalls (ndarray): shape (num_scales, num_dets) or (num_dets, )
precisions (ndarray): shape (num_scales, num_dets) or (num_dets, )
mode (str): 'area' or '11points', 'area' means calculating the area
under precision-recall curve, '11points' means calculating
the average precision of recalls at [0, 0.1, ..., 1]
......@@ -60,11 +60,11 @@ def tpfp_imagenet(det_bboxes,
"""Check if detected bboxes are true positive or false positive.
Args:
det_bbox(ndarray): the detected bbox
gt_bboxes(ndarray): ground truth bboxes of this image
gt_ignore(ndarray): indicate if gts are ignored for evaluation or not
default_iou_thr(float): the iou thresholds for medium and large bboxes
area_ranges(list or None): gt bbox area ranges
det_bbox (ndarray): the detected bbox
gt_bboxes (ndarray): ground truth bboxes of this image
gt_ignore (ndarray): indicate if gts are ignored for evaluation or not
default_iou_thr (float): the iou thresholds for medium and large bboxes
area_ranges (list or None): gt bbox area ranges
Returns:
tuple: two arrays (tp, fp) whose elements are 0 and 1
......@@ -115,10 +115,10 @@ def tpfp_imagenet(det_bboxes,
max_iou = ious[i, j]
matched_gt = j
# there are 4 cases for a det bbox:
# 1. this det bbox matches a gt, tp = 1, fp = 0
# 2. this det bbox matches an ignored gt, tp = 0, fp = 0
# 3. this det bbox matches no gt and within area range, tp = 0, fp = 1
# 4. this det bbox matches no gt but is beyond area range, tp = 0, fp = 0
# 1. it matches a gt, tp = 1, fp = 0
# 2. it matches an ignored gt, tp = 0, fp = 0
# 3. it matches no gt and within area range, tp = 0, fp = 1
# 4. it matches no gt but is beyond area range, tp = 0, fp = 0
if matched_gt >= 0:
gt_covered[matched_gt] = 1
if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]):
......@@ -137,10 +137,10 @@ def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None):
"""Check if detected bboxes are true positive or false positive.
Args:
det_bbox(ndarray): the detected bbox
gt_bboxes(ndarray): ground truth bboxes of this image
gt_ignore(ndarray): indicate if gts are ignored for evaluation or not
iou_thr(float): the iou thresholds
det_bbox (ndarray): the detected bbox
gt_bboxes (ndarray): ground truth bboxes of this image
gt_ignore (ndarray): indicate if gts are ignored for evaluation or not
iou_thr (float): the iou thresholds
Returns:
tuple: (tp, fp), two arrays whose elements are 0 and 1
......@@ -227,15 +227,16 @@ def eval_map(det_results,
"""Evaluate mAP of a dataset.
Args:
det_results(list): a list of list, [[cls1_det, cls2_det, ...], ...]
gt_bboxes(list): ground truth bboxes of each image, a list of K*4 array
gt_labels(list): ground truth labels of each image, a list of K array
gt_ignore(list): gt ignore indicators of each image, a list of K array
scale_ranges(list, optional): [(min1, max1), (min2, max2), ...]
iou_thr(float): IoU threshold
dataset(None or str): dataset name, there are minor differences in
det_results (list): a list of list, [[cls1_det, cls2_det, ...], ...]
gt_bboxes (list): ground truth bboxes of each image, a list of K*4
array.
gt_labels (list): ground truth labels of each image, a list of K array
gt_ignore (list): gt ignore indicators of each image, a list of K array
scale_ranges (list, optional): [(min1, max1), (min2, max2), ...]
iou_thr (float): IoU threshold
dataset (None or str): dataset name, there are minor differences in
metrics for different datsets, e.g. "voc07", "imagenet_det", etc.
print_summary(bool): whether to print the mAP summary
print_summary (bool): whether to print the mAP summary
Returns:
tuple: (mAP, [dict, dict, ...])
......@@ -265,7 +266,8 @@ def eval_map(det_results,
area_ranges) for j in range(len(cls_dets))
]
tp, fp = tuple(zip(*tpfp))
# calculate gt number of each scale, gts ignored or beyond scale are not counted
# calculate gt number of each scale, gts ignored or beyond scale
# are not counted
num_gts = np.zeros(num_scales, dtype=int)
for j, bbox in enumerate(cls_gts):
if area_ranges is None:
......
from .losses import (weighted_nll_loss, weighted_cross_entropy,
weighted_binary_cross_entropy, sigmoid_focal_loss,
weighted_sigmoid_focal_loss, mask_cross_entropy,
smooth_l1_loss, weighted_smoothl1, accuracy)
__all__ = [
'weighted_nll_loss', 'weighted_cross_entropy',
'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss',
'weighted_smoothl1', 'accuracy'
]
# TODO merge naive and weighted loss to one function.
# TODO merge naive and weighted loss.
import torch
import torch.nn.functional as F
from ..bbox_ops import bbox_transform_inv, bbox_overlaps
def weighted_nll_loss(pred, label, weight, avg_factor=None):
if avg_factor is None:
avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
raw = F.nll_loss(pred, label, reduction='none')
return torch.sum(raw * weight)[None] / avg_factor
def weighted_nll_loss(pred, label, weight, ave_factor=None):
if ave_factor is None:
ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
raw = F.nll_loss(pred, label, size_average=False, reduce=False)
return torch.sum(raw * weight)[None] / ave_factor
def weighted_cross_entropy(pred, label, weight, avg_factor=None):
if avg_factor is None:
avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
raw = F.cross_entropy(pred, label, reduction='none')
return torch.sum(raw * weight)[None] / avg_factor
def weighted_cross_entropy(pred, label, weight, ave_factor=None):
if ave_factor is None:
ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
raw = F.cross_entropy(pred, label, size_average=False, reduce=False)
return torch.sum(raw * weight)[None] / ave_factor
def weighted_binary_cross_entropy(pred, label, weight, ave_factor=None):
if ave_factor is None:
ave_factor = max(torch.sum(weight > 0).float().item(), 1.)
def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None):
if avg_factor is None:
avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
return F.binary_cross_entropy_with_logits(
pred, label.float(), weight.float(),
size_average=False)[None] / ave_factor
reduction='sum')[None] / avg_factor
def sigmoid_focal_loss(pred,
......@@ -32,13 +30,13 @@ def sigmoid_focal_loss(pred,
weight,
gamma=2.0,
alpha=0.25,
size_average=True):
reduction='elementwise_mean'):
pred_sigmoid = pred.sigmoid()
pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
weight = (alpha * target + (1 - alpha) * (1 - target)) * weight
weight = weight * pt.pow(gamma)
return F.binary_cross_entropy_with_logits(
pred, target, weight, size_average=size_average)
pred, target, weight, reduction=reduction)
def weighted_sigmoid_focal_loss(pred,
......@@ -46,13 +44,13 @@ def weighted_sigmoid_focal_loss(pred,
weight,
gamma=2.0,
alpha=0.25,
ave_factor=None,
avg_factor=None,
num_classes=80):
if ave_factor is None:
ave_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6
if avg_factor is None:
avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6
return sigmoid_focal_loss(
pred, target, weight, gamma=gamma, alpha=alpha,
size_average=False)[None] / ave_factor
reduction='sum')[None] / avg_factor
def mask_cross_entropy(pred, target, label):
......@@ -60,37 +58,30 @@ def mask_cross_entropy(pred, target, label):
inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
pred_slice = pred[inds, label].squeeze(1)
return F.binary_cross_entropy_with_logits(
pred_slice, target, size_average=True)[None]
def weighted_mask_cross_entropy(pred, target, weight, label):
num_rois = pred.size()[0]
num_samples = torch.sum(weight > 0).float().item() + 1e-6
assert num_samples >= 1
inds = torch.arange(0, num_rois).long().cuda()
pred_slice = pred[inds, label].squeeze(1)
return F.binary_cross_entropy_with_logits(
pred_slice, target, weight, size_average=False)[None] / num_samples
pred_slice, target, reduction='elementwise_mean')[None]
def smooth_l1_loss(pred, target, beta=1.0, size_average=True, reduce=True):
def smooth_l1_loss(pred, target, beta=1.0, reduction='elementwise_mean'):
assert beta > 0
assert pred.size() == target.size() and target.numel() > 0
diff = torch.abs(pred - target)
loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
diff - 0.5 * beta)
if size_average:
loss /= pred.numel()
if reduce:
loss = loss.sum()
return loss
def weighted_smoothl1(pred, target, weight, beta=1.0, ave_factor=None):
if ave_factor is None:
ave_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
loss = smooth_l1_loss(pred, target, beta, size_average=False, reduce=False)
return torch.sum(loss * weight)[None] / ave_factor
reduction = F._Reduction.get_enum(reduction)
# none: 0, elementwise_mean:1, sum: 2
if reduction == 0:
return loss
elif reduction == 1:
return loss.sum() / pred.numel()
elif reduction == 2:
return loss.sum()
def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None):
if avg_factor is None:
avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
loss = smooth_l1_loss(pred, target, beta, reduction='none')
return torch.sum(loss * weight)[None] / avg_factor
def accuracy(pred, target, topk=1):
......
from .losses import (
weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy,
sigmoid_focal_loss, weighted_sigmoid_focal_loss, mask_cross_entropy,
weighted_mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy)
__all__ = [
'weighted_nll_loss', 'weighted_cross_entropy',
'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
'weighted_sigmoid_focal_loss', 'mask_cross_entropy',
'weighted_mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1',
'accuracy'
]
from .utils import split_combined_polys
from .mask_target import mask_target
__all__ = ['split_combined_polys', 'mask_target']
import torch
import numpy as np
import mmcv
from .segms import polys_to_mask_wrt_box
def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_polys_list,
img_meta, cfg):
def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
cfg):
cfg_list = [cfg for _ in range(len(pos_proposals_list))]
img_metas = [img_meta for _ in range(len(pos_proposals_list))]
mask_targets = map(mask_target_single, pos_proposals_list,
pos_assigned_gt_inds_list, gt_polys_list, img_metas,
cfg_list)
mask_targets = torch.cat(tuple(mask_targets), dim=0)
pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
mask_targets = torch.cat(list(mask_targets))
return mask_targets
def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_polys,
img_meta, cfg):
def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
mask_size = cfg.mask_size
num_pos = pos_proposals.size(0)
mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size))
mask_targets = []
if num_pos > 0:
pos_proposals = pos_proposals.cpu().numpy()
proposals_np = pos_proposals.cpu().numpy()
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
scale_factor = img_meta['scale_factor'][0].cpu().numpy()
for i in range(num_pos):
bbox = pos_proposals[i, :] / scale_factor
polys = gt_polys[pos_assigned_gt_inds[i]]
mask = polys_to_mask_wrt_box(polys, bbox, mask_size)
mask = np.array(mask > 0, dtype=np.float32)
mask_targets[i, ...] = torch.from_numpy(mask).to(
mask_targets.device)
gt_mask = gt_masks[pos_assigned_gt_inds[i]]
bbox = proposals_np[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1)
# mask is uint8 both before and after resizing
target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
(mask_size, mask_size))
mask_targets.append(target)
mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
pos_proposals.device)
else:
mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
return mask_targets
import cvbase as cvb
import numpy as np
import pycocotools.mask as mask_utils
import mmcv
def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask):
def split_combined_polys(polys, poly_lens, polys_per_mask):
"""Split the combined 1-D polys into masks.
A mask is represented as a list of polys, and a poly is represented as
......@@ -13,9 +9,9 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask):
tensor. Here we need to split the tensor into original representations.
Args:
gt_polys (list): a list (length = image num) of 1-D tensors
gt_poly_lens (list): a list (length = image num) of poly length
num_polys_per_mask (list): a list (length = image num) of poly number
polys (list): a list (length = image num) of 1-D tensors
poly_lens (list): a list (length = image num) of poly length
polys_per_mask (list): a list (length = image num) of poly number
of each mask
Returns:
......@@ -23,13 +19,12 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask):
list (length = poly num) of numpy array
"""
mask_polys_list = []
for img_id in range(len(gt_polys)):
gt_polys_single = gt_polys[img_id].cpu().numpy()
gt_polys_lens_single = gt_poly_lens[img_id].cpu().numpy().tolist()
num_polys_per_mask_single = num_polys_per_mask[
img_id].cpu().numpy().tolist()
for img_id in range(len(polys)):
polys_single = polys[img_id]
polys_lens_single = poly_lens[img_id].tolist()
polys_per_mask_single = polys_per_mask[img_id].tolist()
split_gt_polys = mmcv.slice_list(gt_polys_single, gt_polys_lens_single)
mask_polys = mmcv.slice_list(split_gt_polys, num_polys_per_mask_single)
split_polys = mmcv.slice_list(polys_single, polys_lens_single)
mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
mask_polys_list.append(mask_polys)
return mask_polys_list
from .segms import (flip_segms, polys_to_mask, mask_to_bbox,
polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting,
rle_mask_nms, rle_masks_to_boxes)
from .utils import split_combined_gt_polys
from .mask_target import mask_target
__all__ = [
'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box',
'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes',
'split_combined_gt_polys', 'mask_target'
]
# This file is copied from Detectron.
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Functions for interacting with segmentation masks in the COCO format.
The following terms are used in this module
mask: a binary mask encoded as a 2D numpy array
segm: a segmentation mask in one of the two COCO formats (polygon or RLE)
polygon: COCO's polygon format
RLE: COCO's run length encoding format
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import pycocotools.mask as mask_util
def flip_segms(segms, height, width):
"""Left/right flip each mask in a list of masks."""
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
if 'counts' in rle and type(rle['counts']) == list:
# Magic RLE format handling painfully discovered by looking at the
# COCO API showAnns function.
rle = mask_util.frPyObjects([rle], height, width)
mask = mask_util.decode(rle)
mask = mask[:, ::-1, :]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
flipped_segms = []
for segm in segms:
if type(segm) == list:
# Polygon format
flipped_segms.append([_flip_poly(poly, width) for poly in segm])
else:
# RLE format
assert type(segm) == dict
flipped_segms.append(_flip_rle(segm, height, width))
return flipped_segms
def polys_to_mask(polygons, height, width):
"""Convert from the COCO polygon segmentation format to a binary mask
encoded as a 2D array of data type numpy.float32. The polygon segmentation
is understood to be enclosed inside a height x width image. The resulting
mask is therefore of shape (height, width).
"""
rle = mask_util.frPyObjects(polygons, height, width)
mask = np.array(mask_util.decode(rle), dtype=np.float32)
# Flatten in case polygons was a list
mask = np.sum(mask, axis=2)
mask = np.array(mask > 0, dtype=np.float32)
return mask
def mask_to_bbox(mask):
"""Compute the tight bounding box of a binary mask."""
xs = np.where(np.sum(mask, axis=0) > 0)[0]
ys = np.where(np.sum(mask, axis=1) > 0)[0]
if len(xs) == 0 or len(ys) == 0:
return None
x0 = xs[0]
x1 = xs[-1]
y0 = ys[0]
y1 = ys[-1]
return np.array((x0, y0, x1, y1), dtype=np.float32)
def polys_to_mask_wrt_box(polygons, box, M):
"""Convert from the COCO polygon segmentation format to a binary mask
encoded as a 2D array of data type numpy.float32. The polygon segmentation
is understood to be enclosed in the given box and rasterized to an M x M
mask. The resulting mask is therefore of shape (M, M).
"""
w = box[2] - box[0]
h = box[3] - box[1]
w = np.maximum(w, 1)
h = np.maximum(h, 1)
polygons_norm = []
for poly in polygons:
p = np.array(poly, dtype=np.float32)
p[0::2] = (p[0::2] - box[0]) * M / w
p[1::2] = (p[1::2] - box[1]) * M / h
polygons_norm.append(p)
rle = mask_util.frPyObjects(polygons_norm, M, M)
mask = np.array(mask_util.decode(rle), dtype=np.float32)
# Flatten in case polygons was a list
mask = np.sum(mask, axis=2)
mask = np.array(mask > 0, dtype=np.float32)
return mask
def polys_to_boxes(polys):
"""Convert a list of polygons into an array of tight bounding boxes."""
boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
for i in range(len(polys)):
poly = polys[i]
x0 = min(min(p[::2]) for p in poly)
x1 = max(max(p[::2]) for p in poly)
y0 = min(min(p[1::2]) for p in poly)
y1 = max(max(p[1::2]) for p in poly)
boxes_from_polys[i, :] = [x0, y0, x1, y1]
return boxes_from_polys
def rle_mask_voting(top_masks,
all_masks,
all_dets,
iou_thresh,
binarize_thresh,
method='AVG'):
"""Returns new masks (in correspondence with `top_masks`) by combining
multiple overlapping masks coming from the pool of `all_masks`. Two methods
for combining masks are supported: 'AVG' uses a weighted average of
overlapping mask pixels; 'UNION' takes the union of all mask pixels.
"""
if len(top_masks) == 0:
return
all_not_crowd = [False] * len(all_masks)
top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
decoded_all_masks = [
np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
]
decoded_top_masks = [
np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
]
all_boxes = all_dets[:, :4].astype(np.int32)
all_scores = all_dets[:, 4]
# Fill box support with weights
mask_shape = decoded_all_masks[0].shape
mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
for k in range(len(all_masks)):
ref_box = all_boxes[k]
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, mask_shape[1])
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, mask_shape[0])
mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
mask_weights = np.maximum(mask_weights, 1e-5)
top_segms_out = []
for k in range(len(top_masks)):
# Corner case of empty mask
if decoded_top_masks[k].sum() == 0:
top_segms_out.append(top_masks[k])
continue
inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
# Only matches itself
if len(inds_to_vote) == 1:
top_segms_out.append(top_masks[k])
continue
masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
if method == 'AVG':
ws = mask_weights[inds_to_vote]
soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
elif method == 'UNION':
# Any pixel that's on joins the mask
soft_mask = np.sum(masks_to_vote, axis=0)
mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
else:
raise NotImplementedError('Method {} is unknown'.format(method))
rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
top_segms_out.append(rle)
return top_segms_out
def rle_mask_nms(masks, dets, thresh, mode='IOU'):
"""Performs greedy non-maximum suppression based on an overlap measurement
between masks. The type of measurement is determined by `mode` and can be
either 'IOU' (standard intersection over union) or 'IOMA' (intersection over
mininum area).
"""
if len(masks) == 0:
return []
if len(masks) == 1:
return [0]
if mode == 'IOU':
# Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))
all_not_crowds = [False] * len(masks)
ious = mask_util.iou(masks, masks, all_not_crowds)
elif mode == 'IOMA':
# Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))
all_crowds = [True] * len(masks)
# ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
ious = mask_util.iou(masks, masks, all_crowds)
# ... = max(area(intersect(m1, m2)) / area(m2),
# area(intersect(m2, m1)) / area(m1))
ious = np.maximum(ious, ious.transpose())
elif mode == 'CONTAINMENT':
# Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
# Which measures how much m2 is contained inside m1
all_crowds = [True] * len(masks)
ious = mask_util.iou(masks, masks, all_crowds)
else:
raise NotImplementedError('Mode {} is unknown'.format(mode))
scores = dets[:, 4]
order = np.argsort(-scores)
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ovr = ious[i, order[1:]]
inds_to_keep = np.where(ovr <= thresh)[0]
order = order[inds_to_keep + 1]
return keep
def rle_masks_to_boxes(masks):
"""Computes the bounding box of each mask in a list of RLE encoded masks."""
if len(masks) == 0:
return []
decoded_masks = [
np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks
]
def get_bounds(flat_mask):
inds = np.where(flat_mask > 0)[0]
return inds.min(), inds.max()
boxes = np.zeros((len(decoded_masks), 4))
keep = [True] * len(decoded_masks)
for i, mask in enumerate(decoded_masks):
if mask.sum() == 0:
keep[i] = False
continue
flat_mask = mask.sum(axis=0)
x0, x1 = get_bounds(flat_mask)
flat_mask = mask.sum(axis=1)
y0, y1 = get_bounds(flat_mask)
boxes[i, :] = (x0, y0, x1, y1)
return boxes, np.where(keep)[0]
import torch
from mmdet.ops import nms
import numpy as np
from ..bbox_ops import bbox_mapping_back
from mmdet.ops import nms
from ..bbox import bbox_mapping_back
def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
......@@ -21,11 +21,12 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
"""
recovered_proposals = []
for proposals, img_info in zip(aug_proposals, img_metas):
shape_scale = img_info['shape_scale'][0]
flip = img_info['flip'][0]
img_shape = img_info['img_shape']
scale_factor = img_info['scale_factor']
flip = img_info['flip']
_proposals = proposals.clone()
_proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], shape_scale,
flip)
_proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
scale_factor, flip)
recovered_proposals.append(_proposals)
aug_proposals = torch.cat(recovered_proposals, dim=0)
nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr,
......@@ -53,9 +54,10 @@ def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
"""
recovered_bboxes = []
for bboxes, img_info in zip(aug_bboxes, img_metas):
shape_scale = img_info['shape_scale'][0]
flip = img_info['flip'][0]
bboxes = bbox_mapping_back(bboxes, shape_scale, flip)
img_shape = img_info[0]['img_shape']
scale_factor = img_info[0]['scale_factor']
flip = img_info[0]['flip']
bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
recovered_bboxes.append(bboxes)
bboxes = torch.stack(recovered_bboxes).mean(dim=0)
if aug_scores is None:
......@@ -73,7 +75,7 @@ def merge_aug_scores(aug_scores):
return np.mean(aug_scores, axis=0)
def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None):
def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
"""Merge augmented mask prediction.
Args:
......@@ -85,7 +87,7 @@ def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None):
tuple: (bboxes, scores)
"""
recovered_masks = [
mask if not img_info['flip'][0] else mask[..., ::-1]
mask if not img_info[0]['flip'] else mask[..., ::-1]
for mask, img_info in zip(aug_masks, img_metas)
]
if weights is None:
......
from .anchor_generator import *
from .anchor_target import *
from mmdet.datasets import collate
from mmdet.nn.parallel import scatter
__all__ = ['_data_func']
def _data_func(data, gpu_id):
imgs, img_metas = tuple(
scatter(collate([data], samples_per_gpu=1), [gpu_id])[0])
return dict(
img=imgs,
img_meta=img_metas,
return_loss=False,
return_bboxes=True,
rescale=True)
import numpy as np
import torch
from collections import OrderedDict
from mmdet.nn.parallel import scatter
def parse_losses(losses):
log_vars = OrderedDict()
for loss_key, loss_value in losses.items():
if isinstance(loss_value, dict):
for _key, _value in loss_value.items():
if isinstance(_value, list):
_value = sum([_loss.mean() for _loss in _value])
else:
_value = _value.mean()
log_vars[_keys] = _value
elif isinstance(loss_value, list):
log_vars[loss_key] = sum(_loss.mean() for _loss in loss_value)
else:
log_vars[loss_key] = loss_value.mean()
loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
log_vars['loss'] = loss
for _key, _value in log_vars.items():
log_vars[_key] = _value.item()
return loss, log_vars
def batch_processor(model, data, train_mode, args=None):
data = scatter(data, [torch.cuda.current_device()])[0]
losses = model(**data)
loss, log_vars = parse_losses(losses)
outputs = dict(
loss=loss / args.world_size,
log_vars=log_vars,
num_samples=len(data['img'].data))
return outputs
from .dist_utils import *
from .hooks import *
from .misc import *
from .dist_utils import init_dist, allreduce_grads, DistOptimizerHook
from .misc import tensor2imgs, unmap, multi_apply
__all__ = [
'init_dist', 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs',
'unmap', 'multi_apply'
]
import os
from collections import OrderedDict
import torch
import torch.multiprocessing as mp
import torch.distributed as dist
from torch.nn.utils import clip_grad
from mmcv.torchpack import Hook, OptimizerStepperHook
__all__ = [
'init_dist', 'average_gradients', 'broadcast_params',
'DistOptimizerStepperHook', 'DistSamplerSeedHook'
]
from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
_take_tensors)
from mmcv.runner import OptimizerHook
def init_dist(world_size,
rank,
backend='gloo',
master_ip='127.0.0.1',
port=29500):
def init_dist(launcher, backend='nccl', **kwargs):
if mp.get_start_method(allow_none=True) is None:
mp.set_start_method('spawn')
if launcher == 'pytorch':
_init_dist_pytorch(backend, **kwargs)
elif launcher == 'mpi':
_init_dist_mpi(backend, **kwargs)
elif launcher == 'slurm':
_init_dist_slurm(backend, **kwargs)
else:
raise ValueError('Invalid launcher type: {}'.format(launcher))
def _init_dist_pytorch(backend, **kwargs):
# TODO: use local_rank instead of rank % num_gpus
rank = int(os.environ['RANK'])
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(rank % num_gpus)
os.environ['MASTER_ADDR'] = master_ip
os.environ['MASTER_PORT'] = str(port)
if backend == 'nccl':
dist.init_process_group(backend='nccl')
dist.init_process_group(backend=backend, **kwargs)
def _init_dist_mpi(backend, **kwargs):
raise NotImplementedError
def _init_dist_slurm(backend, **kwargs):
raise NotImplementedError
def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
if bucket_size_mb > 0:
bucket_size_bytes = bucket_size_mb * 1024 * 1024
buckets = _take_tensors(tensors, bucket_size_bytes)
else:
dist.init_process_group(
backend='gloo', rank=rank, world_size=world_size)
buckets = OrderedDict()
for tensor in tensors:
tp = tensor.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(tensor)
buckets = buckets.values()
for bucket in buckets:
flat_tensors = _flatten_dense_tensors(bucket)
dist.all_reduce(flat_tensors)
flat_tensors.div_(world_size)
for tensor, synced in zip(
bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
tensor.copy_(synced)
def average_gradients(model):
for param in model.parameters():
if param.requires_grad and not (param.grad is None):
dist.all_reduce(param.grad.data)
def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
grads = [
param.grad.data for param in model.parameters()
if param.requires_grad and param.grad is not None
]
world_size = dist.get_world_size()
if coalesce:
_allreduce_coalesced(grads, world_size, bucket_size_mb)
else:
for tensor in grads:
dist.all_reduce(tensor.div_(world_size))
def broadcast_params(model):
for p in model.state_dict().values():
dist.broadcast(p, 0)
class DistOptimizerHook(OptimizerHook):
class DistOptimizerStepperHook(OptimizerStepperHook):
def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
self.grad_clip = grad_clip
self.coalesce = coalesce
self.bucket_size_mb = bucket_size_mb
def after_train_iter(self, runner):
runner.optimizer.zero_grad()
runner.outputs['loss'].backward()
average_gradients(runner.model)
if self.grad_clip:
clip_grad.clip_grad_norm_(
filter(lambda p: p.requires_grad, runner.model.parameters()),
max_norm=self.max_norm,
norm_type=self.norm_type)
allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
if self.grad_clip is not None:
self.clip_grads(runner.model.parameters())
runner.optimizer.step()
class DistSamplerSeedHook(Hook):
def before_epoch(self, runner):
runner.data_loader.sampler.set_epoch(runner.epoch)
import subprocess
from functools import partial
import mmcv
import numpy as np
import torch
from six.moves import map, zip
__all__ = ['tensor2imgs', 'unique', 'unmap', 'results2json']
def tensor2imgs(tensor,
color_order='RGB',
color_mean=(0.485, 0.456, 0.406),
color_std=(0.229, 0.224, 0.225)):
assert color_order in ['RGB', 'BGR']
img_per_gpu = tensor.size(0)
color_mean = np.array(color_mean, dtype=np.float32)
color_std = np.array(color_std, dtype=np.float32)
def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
num_imgs = tensor.size(0)
mean = np.array(mean, dtype=np.float32)
std = np.array(std, dtype=np.float32)
imgs = []
for img_id in range(img_per_gpu):
for img_id in range(num_imgs):
img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
if color_order == 'RGB':
img = mmcv.rgb2bgr(img)
img = img * color_std + color_mean
img = mmcv.imdenormalize(
img, mean, std, to_bgr=to_rgb).astype(np.uint8)
imgs.append(np.ascontiguousarray(img))
return imgs
def unique(tensor):
if tensor.is_cuda:
u_tensor = np.unique(tensor.cpu().numpy())
return tensor.new_tensor(u_tensor)
else:
return torch.unique(tensor)
def multi_apply(func, *args, **kwargs):
pfunc = partial(func, **kwargs) if kwargs else func
map_results = map(pfunc, *args)
return tuple(map(list, zip(*map_results)))
def unmap(data, count, inds, fill=0):
......@@ -44,75 +35,3 @@ def unmap(data, count, inds, fill=0):
ret = data.new_full(new_size, fill)
ret[inds, :] = data
return ret
def xyxy2xywh(bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]
def det2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
json_results.append(data)
return json_results
def segm2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
def proposal2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results
def results2json(dataset, results, out_file):
if isinstance(results[0], list):
json_results = det2json(dataset, results)
elif isinstance(results[0], tuple):
json_results = segm2json(dataset, results)
elif isinstance(results[0], np.ndarray):
json_results = proposal2json(dataset, results)
else:
raise TypeError('invalid type of results')
mmcv.dump(json_results, out_file)
from .coco import CocoDataset
from .collate import *
from .sampler import *
from .transforms import *
from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
from .utils import to_tensor, random_scale, show_ann
__all__ = [
'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'to_tensor', 'random_scale', 'show_ann'
]
......@@ -2,75 +2,17 @@ import os.path as osp
import mmcv
import numpy as np
from mmcv.parallel import DataContainer as DC
from pycocotools.coco import COCO
from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform,
from .transforms import (ImageTransform, BboxTransform, MaskTransform,
Numpy2Tensor)
from .utils import show_ann, random_scale
from .utils import DataContainer as DC
def parse_ann_info(ann_info, cat2label, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
cat2label (dict): The mapping from category ids to labels.
with_mask (bool): Whether to parse mask annotations.
Returns:
tuple: gt_bboxes, gt_labels and gt_mask_info
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# each mask consists of one or several polys, each poly is a list of float.
if with_mask:
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(cat2label[ann['category_id']])
if with_mask:
# Note polys are not resized
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
from .utils import to_tensor, show_ann, random_scale
class CocoDataset(Dataset):
def __init__(self,
ann_file,
img_prefix,
......@@ -137,7 +79,7 @@ class CocoDataset(Dataset):
self.img_transform = ImageTransform(
size_divisor=self.size_divisor, **self.img_norm_cfg)
self.bbox_transform = BboxTransform()
self.mask_transform = PolyMaskTransform()
self.mask_transform = MaskTransform()
self.numpy2tensor = Numpy2Tensor()
def __len__(self):
......@@ -161,6 +103,70 @@ class CocoDataset(Dataset):
ann_info = self.coco.loadAnns(ann_ids)
return ann_info
def _parse_ann_info(self, ann_info, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, mask_polys, poly_lens.
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# Two formats are provided.
# 1. mask: a binary map of the same size of the image.
# 2. polys: each mask consists of one or several polys, each poly is a
# list of float.
if with_mask:
gt_masks = []
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
if with_mask:
gt_masks.append(self.coco.annToMask(ann))
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['masks'] = gt_masks
# poly format is not used in the current implementation
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
......@@ -199,7 +205,7 @@ class CocoDataset(Dataset):
idx = self._rand_another(idx)
continue
ann = parse_ann_info(ann_info, self.cat2label, self.with_mask)
ann = self._parse_ann_info(ann_info, self.with_mask)
gt_bboxes = ann['bboxes']
gt_labels = ann['labels']
gt_bboxes_ignore = ann['bboxes_ignore']
......@@ -211,7 +217,7 @@ class CocoDataset(Dataset):
# apply transforms
flip = True if np.random.rand() < self.flip_ratio else False
img_scale = random_scale(self.img_scales) # sample a scale
img, img_shape, scale_factor = self.img_transform(
img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip)
if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape,
......@@ -222,32 +228,29 @@ class CocoDataset(Dataset):
scale_factor, flip)
if self.with_mask:
gt_mask_polys, gt_poly_lens, num_polys_per_mask = \
self.mask_transform(
ann['mask_polys'], ann['poly_lens'],
img_info['height'], img_info['width'], flip)
gt_masks = self.mask_transform(ann['masks'], pad_shape,
scale_factor, flip)
ori_shape = (img_info['height'], img_info['width'])
ori_shape = (img_info['height'], img_info['width'], 3)
img_meta = dict(
ori_shape=DC(ori_shape),
img_shape=DC(img_shape),
scale_factor=DC(scale_factor),
flip=DC(flip))
ori_shape=ori_shape,
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=flip)
data = dict(
img=DC(img, stack=True),
img_meta=img_meta,
gt_bboxes=DC(gt_bboxes))
img=DC(to_tensor(img), stack=True),
img_meta=DC(img_meta, cpu_only=True),
gt_bboxes=DC(to_tensor(gt_bboxes)))
if self.proposals is not None:
data['proposals'] = DC(proposals)
data['proposals'] = DC(to_tensor(proposals))
if self.with_label:
data['gt_labels'] = DC(gt_labels)
data['gt_labels'] = DC(to_tensor(gt_labels))
if self.with_crowd:
data['gt_bboxes_ignore'] = DC(gt_bboxes_ignore)
data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
if self.with_mask:
data['gt_mask_polys'] = DC(gt_mask_polys)
data['gt_poly_lens'] = DC(gt_poly_lens)
data['num_polys_per_mask'] = DC(num_polys_per_mask)
data['gt_masks'] = DC(gt_masks, cpu_only=True)
return data
def prepare_test_img(self, idx):
......@@ -258,37 +261,38 @@ class CocoDataset(Dataset):
if self.proposals is not None else None)
def prepare_single(img, scale, flip, proposal=None):
_img, _img_shape, _scale_factor = self.img_transform(
_img, img_shape, pad_shape, scale_factor = self.img_transform(
img, scale, flip)
img, img_shape, scale_factor = self.numpy2tensor(
_img, _img_shape, _scale_factor)
ori_shape = (img_info['height'], img_info['width'])
img_meta = dict(
ori_shape=ori_shape,
_img = to_tensor(_img)
_img_meta = dict(
ori_shape=(img_info['height'], img_info['width'], 3),
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=flip)
if proposal is not None:
proposal = self.bbox_transform(proposal, _scale_factor, flip)
proposal = self.numpy2tensor(proposal)
return img, img_meta, proposal
_proposal = self.bbox_transform(proposal, scale_factor, flip)
_proposal = to_tensor(_proposal)
else:
_proposal = None
return _img, _img_meta, _proposal
imgs = []
img_metas = []
proposals = []
for scale in self.img_scales:
img, img_meta, proposal = prepare_single(img, scale, False,
proposal)
imgs.append(img)
img_metas.append(img_meta)
proposals.append(proposal)
_img, _img_meta, _proposal = prepare_single(
img, scale, False, proposal)
imgs.append(_img)
img_metas.append(DC(_img_meta, cpu_only=True))
proposals.append(_proposal)
if self.flip_ratio > 0:
img, img_meta, prop = prepare_single(img, scale, True,
proposal)
imgs.append(img)
img_metas.append(img_meta)
proposals.append(prop)
if self.proposals is None:
return imgs, img_metas
else:
return imgs, img_metas, proposals
_img, _img_meta, _proposal = prepare_single(
img, scale, True, proposal)
imgs.append(_img)
img_metas.append(DC(_img_meta, cpu_only=True))
proposals.append(_proposal)
data = dict(img=imgs, img_meta=img_metas)
if self.proposals is not None:
data['proposals'] = proposals
return data
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment