Commit 57f6da5c authored by bailuo's avatar bailuo
Browse files

readme

parents
from .decorators import auto_fp16, force_fp32
from .hooks import Fp16OptimizerHook, wrap_fp16_model
__all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
import functools
from inspect import getfullargspec
import torch
from .utils import cast_tensor_type
def auto_fp16(apply_to=None, out_fp32=False):
"""Decorator to enable fp16 training automatically.
This decorator is useful when you write custom modules and want to support
mixed precision training. If inputs arguments are fp32 tensors, they will
be converted to fp16 automatically. Arguments other than fp32 tensors are
ignored.
Args:
apply_to (Iterable, optional): The argument names to be converted.
`None` indicates all arguments.
out_fp32 (bool): Whether to convert the output back to fp32.
:Example:
class MyModule1(nn.Module)
# Convert x and y to fp16
@auto_fp16()
def forward(self, x, y):
pass
class MyModule2(nn.Module):
# convert pred to fp16
@auto_fp16(apply_to=('pred', ))
def do_something(self, pred, others):
pass
"""
def auto_fp16_wrapper(old_func):
@functools.wraps(old_func)
def new_func(*args, **kwargs):
# check if the module has set the attribute `fp16_enabled`, if not,
# just fallback to the original method.
if not isinstance(args[0], torch.nn.Module):
raise TypeError('@auto_fp16 can only be used to decorate the '
'method of nn.Module')
if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
return old_func(*args, **kwargs)
# get the arg spec of the decorated method
args_info = getfullargspec(old_func)
# get the argument names to be casted
args_to_cast = args_info.args if apply_to is None else apply_to
# convert the args that need to be processed
new_args = []
# NOTE: default args are not taken into consideration
if args:
arg_names = args_info.args[:len(args)]
for i, arg_name in enumerate(arg_names):
if arg_name in args_to_cast:
new_args.append(
cast_tensor_type(args[i], torch.float, torch.half))
else:
new_args.append(args[i])
# convert the kwargs that need to be processed
new_kwargs = {}
if kwargs:
for arg_name, arg_value in kwargs.items():
if arg_name in args_to_cast:
new_kwargs[arg_name] = cast_tensor_type(
arg_value, torch.float, torch.half)
else:
new_kwargs[arg_name] = arg_value
# apply converted arguments to the decorated method
output = old_func(*new_args, **new_kwargs)
# cast the results back to fp32 if necessary
if out_fp32:
output = cast_tensor_type(output, torch.half, torch.float)
return output
return new_func
return auto_fp16_wrapper
def force_fp32(apply_to=None, out_fp16=False):
"""Decorator to convert input arguments to fp32 in force.
This decorator is useful when you write custom modules and want to support
mixed precision training. If there are some inputs that must be processed
in fp32 mode, then this decorator can handle it. If inputs arguments are
fp16 tensors, they will be converted to fp32 automatically. Arguments other
than fp16 tensors are ignored.
Args:
apply_to (Iterable, optional): The argument names to be converted.
`None` indicates all arguments.
out_fp16 (bool): Whether to convert the output back to fp16.
:Example:
class MyModule1(nn.Module)
# Convert x and y to fp32
@force_fp32()
def loss(self, x, y):
pass
class MyModule2(nn.Module):
# convert pred to fp32
@force_fp32(apply_to=('pred', ))
def post_process(self, pred, others):
pass
"""
def force_fp32_wrapper(old_func):
@functools.wraps(old_func)
def new_func(*args, **kwargs):
# check if the module has set the attribute `fp16_enabled`, if not,
# just fallback to the original method.
if not isinstance(args[0], torch.nn.Module):
raise TypeError('@force_fp32 can only be used to decorate the '
'method of nn.Module')
if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
return old_func(*args, **kwargs)
# get the arg spec of the decorated method
args_info = getfullargspec(old_func)
# get the argument names to be casted
args_to_cast = args_info.args if apply_to is None else apply_to
# convert the args that need to be processed
new_args = []
if args:
arg_names = args_info.args[:len(args)]
for i, arg_name in enumerate(arg_names):
if arg_name in args_to_cast:
new_args.append(
cast_tensor_type(args[i], torch.half, torch.float))
else:
new_args.append(args[i])
# convert the kwargs that need to be processed
new_kwargs = dict()
if kwargs:
for arg_name, arg_value in kwargs.items():
if arg_name in args_to_cast:
new_kwargs[arg_name] = cast_tensor_type(
arg_value, torch.half, torch.float)
else:
new_kwargs[arg_name] = arg_value
# apply converted arguments to the decorated method
output = old_func(*new_args, **new_kwargs)
# cast the results back to fp32 if necessary
if out_fp16:
output = cast_tensor_type(output, torch.float, torch.half)
return output
return new_func
return force_fp32_wrapper
import copy
import torch
import torch.nn as nn
from mmcv.runner import OptimizerHook
from ..utils.dist_utils import allreduce_grads
from .utils import cast_tensor_type
class Fp16OptimizerHook(OptimizerHook):
"""FP16 optimizer hook.
The steps of fp16 optimizer is as follows.
1. Scale the loss value.
2. BP in the fp16 model.
2. Copy gradients from fp16 model to fp32 weights.
3. Update fp32 weights.
4. Copy updated parameters from fp32 weights to fp16 model.
Refer to https://arxiv.org/abs/1710.03740 for more details.
Args:
loss_scale (float): Scale factor multiplied with loss.
"""
def __init__(self,
grad_clip=None,
coalesce=True,
bucket_size_mb=-1,
loss_scale=512.,
distributed=True):
self.grad_clip = grad_clip
self.coalesce = coalesce
self.bucket_size_mb = bucket_size_mb
self.loss_scale = loss_scale
self.distributed = distributed
def before_run(self, runner):
# keep a copy of fp32 weights
runner.optimizer.param_groups = copy.deepcopy(
runner.optimizer.param_groups)
# convert model to fp16
wrap_fp16_model(runner.model)
def copy_grads_to_fp32(self, fp16_net, fp32_weights):
"""Copy gradients from fp16 model to fp32 weight copy."""
for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()):
if fp16_param.grad is not None:
if fp32_param.grad is None:
fp32_param.grad = fp32_param.data.new(fp32_param.size())
fp32_param.grad.copy_(fp16_param.grad)
def copy_params_to_fp16(self, fp16_net, fp32_weights):
"""Copy updated params from fp32 weight copy to fp16 model."""
for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights):
fp16_param.data.copy_(fp32_param.data)
def after_train_iter(self, runner):
# clear grads of last iteration
runner.model.zero_grad()
runner.optimizer.zero_grad()
# scale the loss value
scaled_loss = runner.outputs['loss'] * self.loss_scale
scaled_loss.backward()
# copy fp16 grads in the model to fp32 params in the optimizer
fp32_weights = []
for param_group in runner.optimizer.param_groups:
fp32_weights += param_group['params']
self.copy_grads_to_fp32(runner.model, fp32_weights)
# allreduce grads
if self.distributed:
allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb)
# scale the gradients back
for param in fp32_weights:
if param.grad is not None:
param.grad.div_(self.loss_scale)
if self.grad_clip is not None:
self.clip_grads(fp32_weights)
# update fp32 params
runner.optimizer.step()
# copy fp32 params to the fp16 model
self.copy_params_to_fp16(runner.model, fp32_weights)
def wrap_fp16_model(model):
# convert model to fp16
model.half()
# patch the normalization layers to make it work in fp32 mode
patch_norm_fp32(model)
# set `fp16_enabled` flag
for m in model.modules():
if hasattr(m, 'fp16_enabled'):
m.fp16_enabled = True
def patch_norm_fp32(module):
if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)):
module.float()
module.forward = patch_forward_method(module.forward, torch.half,
torch.float)
for child in module.children():
patch_norm_fp32(child)
return module
def patch_forward_method(func, src_type, dst_type, convert_output=True):
"""Patch the forward method of a module.
Args:
func (callable): The original forward method.
src_type (torch.dtype): Type of input arguments to be converted from.
dst_type (torch.dtype): Type of input arguments to be converted to.
convert_output (bool): Whether to convert the output back to src_type.
Returns:
callable: The patched forward method.
"""
def new_forward(*args, **kwargs):
output = func(*cast_tensor_type(args, src_type, dst_type),
**cast_tensor_type(kwargs, src_type, dst_type))
if convert_output:
output = cast_tensor_type(output, dst_type, src_type)
return output
return new_forward
from collections import abc
import numpy as np
import torch
def cast_tensor_type(inputs, src_type, dst_type):
if isinstance(inputs, torch.Tensor):
return inputs.to(dst_type)
elif isinstance(inputs, str):
return inputs
elif isinstance(inputs, np.ndarray):
return inputs
elif isinstance(inputs, abc.Mapping):
return type(inputs)({
k: cast_tensor_type(v, src_type, dst_type)
for k, v in inputs.items()
})
elif isinstance(inputs, abc.Iterable):
return type(inputs)(
cast_tensor_type(item, src_type, dst_type) for item in inputs)
else:
return inputs
from .mask_target import mask_target
from .utils import split_combined_polys
__all__ = ['split_combined_polys', 'mask_target']
import mmcv
import numpy as np
import torch
from torch.nn.modules.utils import _pair
def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
cfg):
cfg_list = [cfg for _ in range(len(pos_proposals_list))]
mask_targets = map(mask_target_single, pos_proposals_list,
pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
mask_targets = torch.cat(list(mask_targets))
return mask_targets
def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
mask_size = _pair(cfg.mask_size)
num_pos = pos_proposals.size(0)
mask_targets = []
if num_pos > 0:
proposals_np = pos_proposals.cpu().numpy()
_, maxh, maxw = gt_masks.shape
proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1)
proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1)
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
for i in range(num_pos):
gt_mask = gt_masks[pos_assigned_gt_inds[i]]
bbox = proposals_np[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1)
# mask is uint8 both before and after resizing
# mask_size (h, w) to (w, h)
target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
mask_size[::-1])
mask_targets.append(target)
mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
pos_proposals.device)
else:
mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
return mask_targets
import mmcv
def split_combined_polys(polys, poly_lens, polys_per_mask):
"""Split the combined 1-D polys into masks.
A mask is represented as a list of polys, and a poly is represented as
a 1-D array. In dataset, all masks are concatenated into a single 1-D
tensor. Here we need to split the tensor into original representations.
Args:
polys (list): a list (length = image num) of 1-D tensors
poly_lens (list): a list (length = image num) of poly length
polys_per_mask (list): a list (length = image num) of poly number
of each mask
Returns:
list: a list (length = image num) of list (length = mask num) of
list (length = poly num) of numpy array
"""
mask_polys_list = []
for img_id in range(len(polys)):
polys_single = polys[img_id]
polys_lens_single = poly_lens[img_id].tolist()
polys_per_mask_single = polys_per_mask[img_id].tolist()
split_polys = mmcv.slice_list(polys_single, polys_lens_single)
mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
mask_polys_list.append(mask_polys)
return mask_polys_list
from .bbox_nms import multiclass_nms
from .matrix_nms import matrix_nms
from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores)
__all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
'merge_aug_scores', 'merge_aug_masks', 'matrix_nms'
]
import torch
from mmdet.ops.nms import nms_wrapper
def multiclass_nms(multi_bboxes,
multi_scores,
score_thr,
nms_cfg,
max_num=-1,
score_factors=None):
"""NMS for multi-class bboxes.
Args:
multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
multi_scores (Tensor): shape (n, #class), where the 0th column
contains scores of the background class, but this will be ignored.
score_thr (float): bbox threshold, bboxes with scores lower than it
will not be considered.
nms_thr (float): NMS IoU threshold
max_num (int): if there are more than max_num bboxes after NMS,
only top max_num will be kept.
score_factors (Tensor): The factors multiplied to scores before
applying NMS
Returns:
tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
are 0-based.
"""
num_classes = multi_scores.shape[1]
bboxes, labels = [], []
nms_cfg_ = nms_cfg.copy()
nms_type = nms_cfg_.pop('type', 'nms')
nms_op = getattr(nms_wrapper, nms_type)
for i in range(1, num_classes):
cls_inds = multi_scores[:, i] > score_thr
if not cls_inds.any():
continue
# get bboxes and scores of this class
if multi_bboxes.shape[1] == 4:
_bboxes = multi_bboxes[cls_inds, :]
else:
_bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
_scores = multi_scores[cls_inds, i]
if score_factors is not None:
_scores *= score_factors[cls_inds]
cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
i - 1,
dtype=torch.long)
bboxes.append(cls_dets)
labels.append(cls_labels)
if bboxes:
bboxes = torch.cat(bboxes)
labels = torch.cat(labels)
if bboxes.shape[0] > max_num:
_, inds = bboxes[:, -1].sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds]
labels = labels[inds]
else:
bboxes = multi_bboxes.new_zeros((0, 5))
labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
return bboxes, labels
import torch
def matrix_nms(seg_masks, cate_labels, cate_scores, kernel='gaussian', sigma=2.0, sum_masks=None):
"""Matrix NMS for multi-class masks.
Args:
seg_masks (Tensor): shape (n, h, w)
cate_labels (Tensor): shape (n), mask labels in descending order
cate_scores (Tensor): shape (n), mask scores in descending order
kernel (str): 'linear' or 'gauss'
sigma (float): std in gaussian method
sum_masks (Tensor): The sum of seg_masks
Returns:
Tensor: cate_scores_update, tensors of shape (n)
"""
n_samples = len(cate_labels)
if n_samples == 0:
return []
if sum_masks is None:
sum_masks = seg_masks.sum((1, 2)).float()
seg_masks = seg_masks.reshape(n_samples, -1).float()
# inter.
inter_matrix = torch.mm(seg_masks, seg_masks.transpose(1, 0))
# union.
sum_masks_x = sum_masks.expand(n_samples, n_samples)
# iou.
iou_matrix = (inter_matrix / (sum_masks_x + sum_masks_x.transpose(1, 0) - inter_matrix)).triu(diagonal=1)
# label_specific matrix.
cate_labels_x = cate_labels.expand(n_samples, n_samples)
label_matrix = (cate_labels_x == cate_labels_x.transpose(1, 0)).float().triu(diagonal=1)
# IoU compensation
compensate_iou, _ = (iou_matrix * label_matrix).max(0)
compensate_iou = compensate_iou.expand(n_samples, n_samples).transpose(1, 0)
# IoU decay
decay_iou = iou_matrix * label_matrix
# matrix nms
if kernel == 'gaussian':
decay_matrix = torch.exp(-1 * sigma * (decay_iou ** 2))
compensate_matrix = torch.exp(-1 * sigma * (compensate_iou ** 2))
decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0)
elif kernel == 'linear':
decay_matrix = (1-decay_iou)/(1-compensate_iou)
decay_coefficient, _ = decay_matrix.min(0)
else:
raise NotImplementedError
# update the score.
cate_scores_update = cate_scores * decay_coefficient
return cate_scores_update
def multiclass_nms(multi_bboxes,
multi_scores,
score_thr,
nms_cfg,
max_num=-1,
score_factors=None):
"""NMS for multi-class bboxes.
Args:
multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
multi_scores (Tensor): shape (n, #class), where the 0th column
contains scores of the background class, but this will be ignored.
score_thr (float): bbox threshold, bboxes with scores lower than it
will not be considered.
nms_thr (float): NMS IoU threshold
max_num (int): if there are more than max_num bboxes after NMS,
only top max_num will be kept.
score_factors (Tensor): The factors multiplied to scores before
applying NMS
Returns:
tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
are 0-based.
"""
num_classes = multi_scores.shape[1]
bboxes, labels = [], []
nms_cfg_ = nms_cfg.copy()
nms_type = nms_cfg_.pop('type', 'nms')
nms_op = getattr(nms_wrapper, nms_type)
for i in range(1, num_classes):
cls_inds = multi_scores[:, i] > score_thr
if not cls_inds.any():
continue
# get bboxes and scores of this class
if multi_bboxes.shape[1] == 4:
_bboxes = multi_bboxes[cls_inds, :]
else:
_bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
_scores = multi_scores[cls_inds, i]
if score_factors is not None:
_scores *= score_factors[cls_inds]
cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
i - 1,
dtype=torch.long)
bboxes.append(cls_dets)
labels.append(cls_labels)
if bboxes:
bboxes = torch.cat(bboxes)
labels = torch.cat(labels)
if bboxes.shape[0] > max_num:
_, inds = bboxes[:, -1].sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds]
labels = labels[inds]
else:
bboxes = multi_bboxes.new_zeros((0, 5))
labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
return bboxes, labels
import numpy as np
import torch
from mmdet.ops import nms
from ..bbox import bbox_mapping_back
def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
"""Merge augmented proposals (multiscale, flip, etc.)
Args:
aug_proposals (list[Tensor]): proposals from different testing
schemes, shape (n, 5). Note that they are not rescaled to the
original image size.
img_metas (list[dict]): list of image info dict where each dict has:
'img_shape', 'scale_factor', 'flip', and my also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
`mmdet/datasets/pipelines/formatting.py:Collect`.
rpn_test_cfg (dict): rpn test config.
Returns:
Tensor: shape (n, 4), proposals corresponding to original image scale.
"""
recovered_proposals = []
for proposals, img_info in zip(aug_proposals, img_metas):
img_shape = img_info['img_shape']
scale_factor = img_info['scale_factor']
flip = img_info['flip']
_proposals = proposals.clone()
_proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
scale_factor, flip)
recovered_proposals.append(_proposals)
aug_proposals = torch.cat(recovered_proposals, dim=0)
merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
scores = merged_proposals[:, 4]
_, order = scores.sort(0, descending=True)
num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
order = order[:num]
merged_proposals = merged_proposals[order, :]
return merged_proposals
def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
"""Merge augmented detection bboxes and scores.
Args:
aug_bboxes (list[Tensor]): shape (n, 4*#class)
aug_scores (list[Tensor] or None): shape (n, #class)
img_shapes (list[Tensor]): shape (3, ).
rcnn_test_cfg (dict): rcnn test config.
Returns:
tuple: (bboxes, scores)
"""
recovered_bboxes = []
for bboxes, img_info in zip(aug_bboxes, img_metas):
img_shape = img_info[0]['img_shape']
scale_factor = img_info[0]['scale_factor']
flip = img_info[0]['flip']
bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
recovered_bboxes.append(bboxes)
bboxes = torch.stack(recovered_bboxes).mean(dim=0)
if aug_scores is None:
return bboxes
else:
scores = torch.stack(aug_scores).mean(dim=0)
return bboxes, scores
def merge_aug_scores(aug_scores):
"""Merge augmented bbox scores."""
if isinstance(aug_scores[0], torch.Tensor):
return torch.mean(torch.stack(aug_scores), dim=0)
else:
return np.mean(aug_scores, axis=0)
def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
"""Merge augmented mask prediction.
Args:
aug_masks (list[ndarray]): shape (n, #class, h, w)
img_shapes (list[ndarray]): shape (3, ).
rcnn_test_cfg (dict): rcnn test config.
Returns:
tuple: (bboxes, scores)
"""
recovered_masks = [
mask if not img_info[0]['flip'] else mask[..., ::-1]
for mask, img_info in zip(aug_masks, img_metas)
]
if weights is None:
merged_masks = np.mean(recovered_masks, axis=0)
else:
merged_masks = np.average(
np.array(recovered_masks), axis=0, weights=np.array(weights))
return merged_masks
from .dist_utils import DistOptimizerHook, allreduce_grads
from .misc import multi_apply, tensor2imgs, unmap
__all__ = [
'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
'multi_apply'
]
from collections import OrderedDict
import torch.distributed as dist
from mmcv.runner import OptimizerHook
from torch._utils import (_flatten_dense_tensors, _take_tensors,
_unflatten_dense_tensors)
def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
if bucket_size_mb > 0:
bucket_size_bytes = bucket_size_mb * 1024 * 1024
buckets = _take_tensors(tensors, bucket_size_bytes)
else:
buckets = OrderedDict()
for tensor in tensors:
tp = tensor.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(tensor)
buckets = buckets.values()
for bucket in buckets:
flat_tensors = _flatten_dense_tensors(bucket)
dist.all_reduce(flat_tensors)
flat_tensors.div_(world_size)
for tensor, synced in zip(
bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
tensor.copy_(synced)
def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
grads = [
param.grad.data for param in params
if param.requires_grad and param.grad is not None
]
world_size = dist.get_world_size()
if coalesce:
_allreduce_coalesced(grads, world_size, bucket_size_mb)
else:
for tensor in grads:
dist.all_reduce(tensor.div_(world_size))
class DistOptimizerHook(OptimizerHook):
def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
self.grad_clip = grad_clip
self.coalesce = coalesce
self.bucket_size_mb = bucket_size_mb
def after_train_iter(self, runner):
runner.optimizer.zero_grad()
runner.outputs['loss'].backward()
allreduce_grads(runner.model.parameters(), self.coalesce,
self.bucket_size_mb)
if self.grad_clip is not None:
self.clip_grads(runner.model.parameters())
runner.optimizer.step()
from functools import partial
import mmcv
import numpy as np
from six.moves import map, zip
def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
num_imgs = tensor.size(0)
mean = np.array(mean, dtype=np.float32)
std = np.array(std, dtype=np.float32)
imgs = []
for img_id in range(num_imgs):
img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
img = mmcv.imdenormalize(
img, mean, std, to_bgr=to_rgb).astype(np.uint8)
imgs.append(np.ascontiguousarray(img))
return imgs
def multi_apply(func, *args, **kwargs):
pfunc = partial(func, **kwargs) if kwargs else func
map_results = map(pfunc, *args)
return tuple(map(list, zip(*map_results)))
def unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if data.dim() == 1:
ret = data.new_full((count, ), fill)
ret[inds] = data
else:
new_size = (count, ) + data.size()[1:]
ret = data.new_full(new_size, fill)
ret[inds, :] = data
return ret
from .builder import build_dataset
from .cityscapes import CityscapesDataset
from .coco import CocoDataset
from .custom import CustomDataset
from .dataset_wrappers import ConcatDataset, RepeatDataset
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .registry import DATASETS
from .voc import VOCDataset
from .wider_face import WIDERFaceDataset
from .xml_style import XMLDataset
__all__ = [
'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset',
'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset',
'DATASETS', 'build_dataset'
]
import copy
from mmdet.utils import build_from_cfg
from .dataset_wrappers import ConcatDataset, RepeatDataset
from .registry import DATASETS
def _concat_dataset(cfg, default_args=None):
ann_files = cfg['ann_file']
img_prefixes = cfg.get('img_prefix', None)
seg_prefixes = cfg.get('seg_prefix', None)
proposal_files = cfg.get('proposal_file', None)
datasets = []
num_dset = len(ann_files)
for i in range(num_dset):
data_cfg = copy.deepcopy(cfg)
data_cfg['ann_file'] = ann_files[i]
if isinstance(img_prefixes, (list, tuple)):
data_cfg['img_prefix'] = img_prefixes[i]
if isinstance(seg_prefixes, (list, tuple)):
data_cfg['seg_prefix'] = seg_prefixes[i]
if isinstance(proposal_files, (list, tuple)):
data_cfg['proposal_file'] = proposal_files[i]
datasets.append(build_dataset(data_cfg, default_args))
return ConcatDataset(datasets)
def build_dataset(cfg, default_args=None):
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
elif cfg['type'] == 'RepeatDataset':
dataset = RepeatDataset(
build_dataset(cfg['dataset'], default_args), cfg['times'])
elif isinstance(cfg['ann_file'], (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
else:
dataset = build_from_cfg(cfg, DATASETS, default_args)
return dataset
from .coco import CocoDataset
from .registry import DATASETS
@DATASETS.register_module
class CityscapesDataset(CocoDataset):
CLASSES = ('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
'bicycle')
import numpy as np
from pycocotools.coco import COCO
from .custom import CustomDataset
from .registry import DATASETS
@DATASETS.register_module
class CocoDataset(CustomDataset):
CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')
def load_annotations(self, ann_file):
self.coco = COCO(ann_file)
self.cat_ids = self.coco.getCatIds()
self.cat2label = {
cat_id: i + 1
for i, cat_id in enumerate(self.cat_ids)
}
self.img_ids = self.coco.getImgIds()
img_infos = []
for i in self.img_ids:
info = self.coco.loadImgs([i])[0]
info['filename'] = info['file_name']
img_infos.append(info)
return img_infos
def get_ann_info(self, idx):
img_id = self.img_infos[idx]['id']
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
ann_info = self.coco.loadAnns(ann_ids)
return self._parse_ann_info(self.img_infos[idx], ann_info)
def _filter_imgs(self, min_size=32):
"""Filter images too small or without ground truths."""
valid_inds = []
ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
for i, img_info in enumerate(self.img_infos):
if self.filter_empty_gt and self.img_ids[i] not in ids_with_ann:
continue
if min(img_info['width'], img_info['height']) >= min_size:
valid_inds.append(i)
return valid_inds
def _parse_ann_info(self, img_info, ann_info):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, seg_map. "masks" are raw annotations and not
decoded into binary masks.
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
gt_masks_ann = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann.get('iscrowd', False):
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
gt_masks_ann.append(ann['segmentation'])
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
seg_map = img_info['filename'].replace('jpg', 'png')
ann = dict(
bboxes=gt_bboxes,
labels=gt_labels,
bboxes_ignore=gt_bboxes_ignore,
masks=gt_masks_ann,
seg_map=seg_map)
return ann
import os.path as osp
import mmcv
import numpy as np
from torch.utils.data import Dataset
from .pipelines import Compose
from .registry import DATASETS
@DATASETS.register_module
class CustomDataset(Dataset):
"""Custom dataset for detection.
Annotation format:
[
{
'filename': 'a.jpg',
'width': 1280,
'height': 720,
'ann': {
'bboxes': <np.ndarray> (n, 4),
'labels': <np.ndarray> (n, ),
'bboxes_ignore': <np.ndarray> (k, 4), (optional field)
'labels_ignore': <np.ndarray> (k, 4) (optional field)
}
},
...
]
The `ann` field is optional for testing.
"""
CLASSES = None
def __init__(self,
ann_file,
pipeline,
data_root=None,
img_prefix='',
seg_prefix=None,
proposal_file=None,
test_mode=False,
filter_empty_gt=True):
self.ann_file = ann_file
self.data_root = data_root
self.img_prefix = img_prefix
self.seg_prefix = seg_prefix
self.proposal_file = proposal_file
self.test_mode = test_mode
self.filter_empty_gt = filter_empty_gt
# join paths if data_root is specified
if self.data_root is not None:
if not osp.isabs(self.ann_file):
self.ann_file = osp.join(self.data_root, self.ann_file)
if not (self.img_prefix is None or osp.isabs(self.img_prefix)):
self.img_prefix = osp.join(self.data_root, self.img_prefix)
if not (self.seg_prefix is None or osp.isabs(self.seg_prefix)):
self.seg_prefix = osp.join(self.data_root, self.seg_prefix)
if not (self.proposal_file is None
or osp.isabs(self.proposal_file)):
self.proposal_file = osp.join(self.data_root,
self.proposal_file)
# load annotations (and proposals)
self.img_infos = self.load_annotations(self.ann_file)
if self.proposal_file is not None:
self.proposals = self.load_proposals(self.proposal_file)
else:
self.proposals = None
# filter images too small
if not test_mode:
valid_inds = self._filter_imgs()
self.img_infos = [self.img_infos[i] for i in valid_inds]
if self.proposals is not None:
self.proposals = [self.proposals[i] for i in valid_inds]
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
# processing pipeline
self.pipeline = Compose(pipeline)
def __len__(self):
return len(self.img_infos)
def load_annotations(self, ann_file):
return mmcv.load(ann_file)
def load_proposals(self, proposal_file):
return mmcv.load(proposal_file)
def get_ann_info(self, idx):
return self.img_infos[idx]['ann']
def pre_pipeline(self, results):
results['img_prefix'] = self.img_prefix
results['seg_prefix'] = self.seg_prefix
results['proposal_file'] = self.proposal_file
results['bbox_fields'] = []
results['mask_fields'] = []
results['seg_fields'] = []
def _filter_imgs(self, min_size=32):
"""Filter images too small."""
valid_inds = []
for i, img_info in enumerate(self.img_infos):
if min(img_info['width'], img_info['height']) >= min_size:
valid_inds.append(i)
return valid_inds
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
for i in range(len(self)):
img_info = self.img_infos[i]
if img_info['width'] / img_info['height'] > 1:
self.flag[i] = 1
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_img(idx)
while True:
data = self.prepare_train_img(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def prepare_train_img(self, idx):
img_info = self.img_infos[idx]
ann_info = self.get_ann_info(idx)
results = dict(img_info=img_info, ann_info=ann_info)
if self.proposals is not None:
results['proposals'] = self.proposals[idx]
self.pre_pipeline(results)
return self.pipeline(results)
def prepare_test_img(self, idx):
img_info = self.img_infos[idx]
results = dict(img_info=img_info)
if self.proposals is not None:
results['proposals'] = self.proposals[idx]
self.pre_pipeline(results)
return self.pipeline(results)
import numpy as np
from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
from .registry import DATASETS
@DATASETS.register_module
class ConcatDataset(_ConcatDataset):
"""A wrapper of concatenated dataset.
Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
concat the group flag for image aspect ratio.
Args:
datasets (list[:obj:`Dataset`]): A list of datasets.
"""
def __init__(self, datasets):
super(ConcatDataset, self).__init__(datasets)
self.CLASSES = datasets[0].CLASSES
if hasattr(datasets[0], 'flag'):
flags = []
for i in range(0, len(datasets)):
flags.append(datasets[i].flag)
self.flag = np.concatenate(flags)
@DATASETS.register_module
class RepeatDataset(object):
"""A wrapper of repeated dataset.
The length of repeated dataset will be `times` larger than the original
dataset. This is useful when the data loading time is long but the dataset
is small. Using RepeatDataset can reduce the data loading time between
epochs.
Args:
dataset (:obj:`Dataset`): The dataset to be repeated.
times (int): Repeat times.
"""
def __init__(self, dataset, times):
self.dataset = dataset
self.times = times
self.CLASSES = dataset.CLASSES
if hasattr(self.dataset, 'flag'):
self.flag = np.tile(self.dataset.flag, times)
self._ori_len = len(self.dataset)
def __getitem__(self, idx):
return self.dataset[idx % self._ori_len]
def __len__(self):
return self.times * self._ori_len
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment