Commit 85529f35 authored by unknown's avatar unknown
Browse files

添加openmmlab测试用例

parent b21b0c01
from .accuracy import Accuracy, accuracy
from .asymmetric_loss import AsymmetricLoss, asymmetric_loss
from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
cross_entropy)
from .focal_loss import FocalLoss, sigmoid_focal_loss
from .label_smooth_loss import LabelSmoothLoss
from .utils import (convert_to_one_hot, reduce_loss, weight_reduce_loss,
weighted_loss)
__all__ = [
'accuracy', 'Accuracy', 'asymmetric_loss', 'AsymmetricLoss',
'cross_entropy', 'binary_cross_entropy', 'CrossEntropyLoss', 'reduce_loss',
'weight_reduce_loss', 'LabelSmoothLoss', 'weighted_loss', 'FocalLoss',
'sigmoid_focal_loss', 'convert_to_one_hot'
]
import numpy as np
import torch
import torch.nn as nn
def accuracy_numpy(pred, target, topk=1, thrs=None):
if thrs is None:
thrs = 0.0
if isinstance(thrs, float):
thrs = (thrs, )
res_single = True
elif isinstance(thrs, tuple):
res_single = False
else:
raise TypeError(
f'thrs should be float or tuple, but got {type(thrs)}.')
res = []
maxk = max(topk)
num = pred.shape[0]
pred_label = pred.argsort(axis=1)[:, -maxk:][:, ::-1]
pred_score = np.sort(pred, axis=1)[:, -maxk:][:, ::-1]
for k in topk:
correct_k = pred_label[:, :k] == target.reshape(-1, 1)
res_thr = []
for thr in thrs:
# Only prediction values larger than thr are counted as correct
_correct_k = correct_k & (pred_score[:, :k] > thr)
_correct_k = np.logical_or.reduce(_correct_k, axis=1)
res_thr.append(_correct_k.sum() * 100. / num)
if res_single:
res.append(res_thr[0])
else:
res.append(res_thr)
return res
def accuracy_torch(pred, target, topk=1, thrs=None):
if thrs is None:
thrs = 0.0
if isinstance(thrs, float):
thrs = (thrs, )
res_single = True
elif isinstance(thrs, tuple):
res_single = False
else:
raise TypeError(
f'thrs should be float or tuple, but got {type(thrs)}.')
res = []
maxk = max(topk)
num = pred.size(0)
pred_score, pred_label = pred.topk(maxk, dim=1)
pred_label = pred_label.t()
correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
for k in topk:
res_thr = []
for thr in thrs:
# Only prediction values larger than thr are counted as correct
_correct = correct & (pred_score.t() > thr)
correct_k = _correct[:k].reshape(-1).float().sum(0, keepdim=True)
res_thr.append(correct_k.mul_(100. / num))
if res_single:
res.append(res_thr[0])
else:
res.append(res_thr)
return res
def accuracy(pred, target, topk=1, thrs=None):
"""Calculate accuracy according to the prediction and target.
Args:
pred (torch.Tensor | np.array): The model prediction.
target (torch.Tensor | np.array): The target of each prediction
topk (int | tuple[int]): If the predictions in ``topk``
matches the target, the predictions will be regarded as
correct ones. Defaults to 1.
thrs (float, optional): thrs (float | tuple[float], optional):
Predictions with scores under the thresholds are considered
negative. Default to None.
Returns:
float | list[float] | list[list[float]]: If the input ``topk`` is a
single integer, the function will return a single float or a list
depending on whether ``thrs`` is a single float. If the input
``topk`` is a tuple, the function will return a list of results
of accuracies of each ``topk`` number. That is to say, as long as
``topk`` is a tuple, the returned list shall be of the same length
as topk.
"""
assert isinstance(topk, (int, tuple))
if isinstance(topk, int):
topk = (topk, )
return_single = True
else:
return_single = False
if isinstance(pred, torch.Tensor) and isinstance(target, torch.Tensor):
res = accuracy_torch(pred, target, topk, thrs)
elif isinstance(pred, np.ndarray) and isinstance(target, np.ndarray):
res = accuracy_numpy(pred, target, topk, thrs)
else:
raise TypeError(
f'pred and target should both be torch.Tensor or np.ndarray, '
f'but got {type(pred)} and {type(target)}.')
return res[0] if return_single else res
class Accuracy(nn.Module):
def __init__(self, topk=(1, )):
"""Module to calculate the accuracy.
Args:
topk (tuple): The criterion used to calculate the
accuracy. Defaults to (1,).
"""
super().__init__()
self.topk = topk
def forward(self, pred, target):
"""Forward function to calculate accuracy.
Args:
pred (torch.Tensor): Prediction of models.
target (torch.Tensor): Target for each prediction.
Returns:
list[float]: The accuracies under different topk criterions.
"""
return accuracy(pred, target, self.topk)
import torch
import torch.nn as nn
from ..builder import LOSSES
from .utils import weight_reduce_loss
def asymmetric_loss(pred,
target,
weight=None,
gamma_pos=1.0,
gamma_neg=4.0,
clip=0.05,
reduction='mean',
avg_factor=None):
"""asymmetric loss.
Please refer to the `paper <https://arxiv.org/abs/2009.14119>`_ for
details.
Args:
pred (torch.Tensor): The prediction with shape (N, *).
target (torch.Tensor): The ground truth label of the prediction with
shape (N, *).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, ). Dafaults to None.
gamma_pos (float): positive focusing parameter. Defaults to 0.0.
gamma_neg (float): Negative focusing parameter. We usually set
gamma_neg > gamma_pos. Defaults to 4.0.
clip (float, optional): Probability margin. Defaults to 0.05.
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". If reduction is 'none' , loss
is same shape as pred and label. Defaults to 'mean'.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
torch.Tensor: Loss.
"""
assert pred.shape == \
target.shape, 'pred and target should be in the same shape.'
eps = 1e-8
pred_sigmoid = pred.sigmoid()
target = target.type_as(pred)
if clip and clip > 0:
pt = (1 - pred_sigmoid +
clip).clamp(max=1) * (1 - target) + pred_sigmoid * target
else:
pt = (1 - pred_sigmoid) * (1 - target) + pred_sigmoid * target
asymmetric_weight = (1 - pt).pow(gamma_pos * target + gamma_neg *
(1 - target))
loss = -torch.log(pt.clamp(min=eps)) * asymmetric_weight
if weight is not None:
assert weight.dim() == 1
weight = weight.float()
if pred.dim() > 1:
weight = weight.reshape(-1, 1)
loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
return loss
@LOSSES.register_module()
class AsymmetricLoss(nn.Module):
"""asymmetric loss.
Args:
gamma_pos (float): positive focusing parameter.
Defaults to 0.0.
gamma_neg (float): Negative focusing parameter. We
usually set gamma_neg > gamma_pos. Defaults to 4.0.
clip (float, optional): Probability margin. Defaults to 0.05.
reduction (str): The method used to reduce the loss into
a scalar.
loss_weight (float): Weight of loss. Defaults to 1.0.
"""
def __init__(self,
gamma_pos=0.0,
gamma_neg=4.0,
clip=0.05,
reduction='mean',
loss_weight=1.0):
super(AsymmetricLoss, self).__init__()
self.gamma_pos = gamma_pos
self.gamma_neg = gamma_neg
self.clip = clip
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""asymmetric loss."""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_cls = self.loss_weight * asymmetric_loss(
pred,
target,
weight,
gamma_pos=self.gamma_pos,
gamma_neg=self.gamma_neg,
clip=self.clip,
reduction=reduction,
avg_factor=avg_factor)
return loss_cls
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import weight_reduce_loss
def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None):
"""Calculate the CrossEntropy loss.
Args:
pred (torch.Tensor): The prediction with shape (N, C), C is the number
of classes.
label (torch.Tensor): The gt label of the prediction.
weight (torch.Tensor, optional): Sample-wise loss weight.
reduction (str): The method used to reduce the loss.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
torch.Tensor: The calculated loss
"""
# element-wise losses
loss = F.cross_entropy(pred, label, reduction='none')
# apply weights and do the reduction
if weight is not None:
weight = weight.float()
loss = weight_reduce_loss(
loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
return loss
def soft_cross_entropy(pred,
label,
weight=None,
reduction='mean',
avg_factor=None):
"""Calculate the Soft CrossEntropy loss. The label can be float.
Args:
pred (torch.Tensor): The prediction with shape (N, C), C is the number
of classes.
label (torch.Tensor): The gt label of the prediction with shape (N, C).
When using "mixup", the label can be float.
weight (torch.Tensor, optional): Sample-wise loss weight.
reduction (str): The method used to reduce the loss.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
torch.Tensor: The calculated loss
"""
# element-wise losses
loss = -label * F.log_softmax(pred, dim=-1)
loss = loss.sum(dim=-1)
# apply weights and do the reduction
if weight is not None:
weight = weight.float()
loss = weight_reduce_loss(
loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
return loss
def binary_cross_entropy(pred,
label,
weight=None,
reduction='mean',
avg_factor=None):
"""Calculate the binary CrossEntropy loss with logits.
Args:
pred (torch.Tensor): The prediction with shape (N, *).
label (torch.Tensor): The gt label with shape (N, *).
weight (torch.Tensor, optional): Element-wise weight of loss with shape
(N, ). Defaults to None.
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". If reduction is 'none' , loss
is same shape as pred and label. Defaults to 'mean'.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
torch.Tensor: The calculated loss
"""
assert pred.dim() == label.dim()
loss = F.binary_cross_entropy_with_logits(pred, label, reduction='none')
# apply weights and do the reduction
if weight is not None:
assert weight.dim() == 1
weight = weight.float()
if pred.dim() > 1:
weight = weight.reshape(-1, 1)
loss = weight_reduce_loss(
loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
return loss
@LOSSES.register_module()
class CrossEntropyLoss(nn.Module):
"""Cross entropy loss.
Args:
use_sigmoid (bool): Whether the prediction uses sigmoid
of softmax. Defaults to False.
use_soft (bool): Whether to use the soft version of CrossEntropyLoss.
Defaults to False.
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". Defaults to 'mean'.
loss_weight (float): Weight of the loss. Defaults to 1.0.
"""
def __init__(self,
use_sigmoid=False,
use_soft=False,
reduction='mean',
loss_weight=1.0):
super(CrossEntropyLoss, self).__init__()
self.use_sigmoid = use_sigmoid
self.use_soft = use_soft
assert not (
self.use_soft and self.use_sigmoid
), 'use_sigmoid and use_soft could not be set simultaneously'
self.reduction = reduction
self.loss_weight = loss_weight
if self.use_sigmoid:
self.cls_criterion = binary_cross_entropy
elif self.use_soft:
self.cls_criterion = soft_cross_entropy
else:
self.cls_criterion = cross_entropy
def forward(self,
cls_score,
label,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_cls = self.loss_weight * self.cls_criterion(
cls_score,
label,
weight,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss_cls
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import weight_reduce_loss
def sigmoid_focal_loss(pred,
target,
weight=None,
gamma=2.0,
alpha=0.25,
reduction='mean',
avg_factor=None):
"""Sigmoid focal loss.
Args:
pred (torch.Tensor): The prediction with shape (N, *).
target (torch.Tensor): The ground truth label of the prediction with
shape (N, *).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, ). Dafaults to None.
gamma (float): The gamma for calculating the modulating factor.
Defaults to 2.0.
alpha (float): A balanced form for Focal Loss. Defaults to 0.25.
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". If reduction is 'none' ,
loss is same shape as pred and label. Defaults to 'mean'.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
torch.Tensor: Loss.
"""
assert pred.shape == \
target.shape, 'pred and target should be in the same shape.'
pred_sigmoid = pred.sigmoid()
target = target.type_as(pred)
pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
focal_weight = (alpha * target + (1 - alpha) *
(1 - target)) * pt.pow(gamma)
loss = F.binary_cross_entropy_with_logits(
pred, target, reduction='none') * focal_weight
if weight is not None:
assert weight.dim() == 1
weight = weight.float()
if pred.dim() > 1:
weight = weight.reshape(-1, 1)
loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
return loss
@LOSSES.register_module()
class FocalLoss(nn.Module):
"""Focal loss.
Args:
gamma (float): Focusing parameter in focal loss.
Defaults to 2.0.
alpha (float): The parameter in balanced form of focal
loss. Defaults to 0.25.
reduction (str): The method used to reduce the loss into
a scalar. Options are "none" and "mean". Defaults to 'mean'.
loss_weight (float): Weight of loss. Defaults to 1.0.
"""
def __init__(self,
gamma=2.0,
alpha=0.25,
reduction='mean',
loss_weight=1.0):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""Sigmoid focal loss.
Args:
pred (torch.Tensor): The prediction with shape (N, *).
target (torch.Tensor): The ground truth label of the prediction
with shape (N, *).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, *). Dafaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The method used to reduce the
loss into a scalar. Options are "none", "mean" and "sum".
Defaults to None.
Returns:
torch.Tensor: Loss.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_cls = self.loss_weight * sigmoid_focal_loss(
pred,
target,
weight,
gamma=self.gamma,
alpha=self.alpha,
reduction=reduction,
avg_factor=avg_factor)
return loss_cls
import warnings
import torch
import torch.nn as nn
from ..builder import LOSSES
from .cross_entropy_loss import CrossEntropyLoss
from .utils import convert_to_one_hot
@LOSSES.register_module()
class LabelSmoothLoss(nn.Module):
r"""Intializer for the label smoothed cross entropy loss.
Refers to `Rethinking the Inception Architecture for Computer Vision` -
https://arxiv.org/abs/1512.00567
This decreases gap between output scores and encourages generalization.
Labels provided to forward can be one-hot like vectors (NxC) or class
indices (Nx1).
And this accepts linear combination of one-hot like labels from mixup or
cutmix except multi-label task.
Args:
label_smooth_val (float): The degree of label smoothing.
num_classes (int, optional): Number of classes. Defaults to None.
mode (str): Refers to notes, Options are 'original', 'classy_vision',
'multi_label'. Defaults to 'classy_vision'
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". Defaults to 'mean'.
loss_weight (float): Weight of the loss. Defaults to 1.0.
Notes:
if the mode is "original", this will use the same label smooth method
as the original paper as:
.. math::
(1-\epsilon)\delta_{k, y} + \frac{\epsilon}{K}
where epsilon is the `label_smooth_val`, K is the num_classes and
delta(k,y) is Dirac delta, which equals 1 for k=y and 0 otherwise.
if the mode is "classy_vision", this will use the same label smooth
method as the facebookresearch/ClassyVision repo as:
.. math::
\frac{\delta_{k, y} + \epsilon/K}{1+\epsilon}
if the mode is "multi_label", this will accept labels from multi-label
task and smoothing them as:
.. math::
(1-2\epsilon)\delta_{k, y} + \epsilon
"""
def __init__(self,
label_smooth_val,
num_classes=None,
mode=None,
reduction='mean',
loss_weight=1.0):
super().__init__()
self.num_classes = num_classes
self.loss_weight = loss_weight
assert (isinstance(label_smooth_val, float)
and 0 <= label_smooth_val < 1), \
f'LabelSmoothLoss accepts a float label_smooth_val ' \
f'over [0, 1), but gets {label_smooth_val}'
self.label_smooth_val = label_smooth_val
accept_reduction = {'none', 'mean', 'sum'}
assert reduction in accept_reduction, \
f'LabelSmoothLoss supports reduction {accept_reduction}, ' \
f'but gets {mode}.'
self.reduction = reduction
if mode is None:
warnings.warn(
'LabelSmoothLoss mode is not set, use "classy_vision" '
'by default. The default value will be changed to '
'"original" recently. Please set mode manually if want '
'to keep "classy_vision".', UserWarning)
mode = 'classy_vision'
accept_mode = {'original', 'classy_vision', 'multi_label'}
assert mode in accept_mode, \
f'LabelSmoothLoss supports mode {accept_mode}, but gets {mode}.'
self.mode = mode
self._eps = label_smooth_val
if mode == 'classy_vision':
self._eps = label_smooth_val / (1 + label_smooth_val)
if mode == 'multi_label':
self.ce = CrossEntropyLoss(use_sigmoid=True)
self.smooth_label = self.multilabel_smooth_label
else:
self.ce = CrossEntropyLoss(use_soft=True)
self.smooth_label = self.original_smooth_label
def generate_one_hot_like_label(self, label):
"""This function takes one-hot or index label vectors and computes one-
hot like label vectors (float)"""
# check if targets are inputted as class integers
if label.dim() == 1 or (label.dim() == 2 and label.shape[1] == 1):
label = convert_to_one_hot(label.view(-1, 1), self.num_classes)
return label.float()
def original_smooth_label(self, one_hot_like_label):
assert self.num_classes > 0
smooth_label = one_hot_like_label * (1 - self._eps)
smooth_label += self._eps / self.num_classes
return smooth_label
def multilabel_smooth_label(self, one_hot_like_label):
assert self.num_classes > 0
smooth_label = torch.full_like(one_hot_like_label, self._eps)
smooth_label.masked_fill_(one_hot_like_label > 0, 1 - self._eps)
return smooth_label
def forward(self,
cls_score,
label,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
if self.num_classes is not None:
assert self.num_classes == cls_score.shape[1], \
f'num_classes should equal to cls_score.shape[1], ' \
f'but got num_classes: {self.num_classes} and ' \
f'cls_score.shape[1]: {cls_score.shape[1]}'
else:
self.num_classes = cls_score.shape[1]
one_hot_like_label = self.generate_one_hot_like_label(label=label)
assert one_hot_like_label.shape == cls_score.shape, \
f'LabelSmoothLoss requires output and target ' \
f'to be same shape, but got output.shape: {cls_score.shape} ' \
f'and target.shape: {one_hot_like_label.shape}'
smoothed_label = self.smooth_label(one_hot_like_label)
return self.ce.forward(
cls_score,
smoothed_label,
weight=weight,
avg_factor=avg_factor,
reduction_override=reduction_override,
**kwargs)
import functools
import torch
import torch.nn.functional as F
def reduce_loss(loss, reduction):
"""Reduce loss as specified.
Args:
loss (Tensor): Elementwise loss tensor.
reduction (str): Options are "none", "mean" and "sum".
Return:
Tensor: Reduced loss tensor.
"""
reduction_enum = F._Reduction.get_enum(reduction)
# none: 0, elementwise_mean:1, sum: 2
if reduction_enum == 0:
return loss
elif reduction_enum == 1:
return loss.mean()
elif reduction_enum == 2:
return loss.sum()
def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
"""Apply element-wise weight and reduce loss.
Args:
loss (Tensor): Element-wise loss.
weight (Tensor): Element-wise weights.
reduction (str): Same as built-in losses of PyTorch.
avg_factor (float): Avarage factor when computing the mean of losses.
Returns:
Tensor: Processed loss values.
"""
# if weight is specified, apply element-wise weight
if weight is not None:
loss = loss * weight
# if avg_factor is not specified, just reduce the loss
if avg_factor is None:
loss = reduce_loss(loss, reduction)
else:
# if reduction is mean, then average the loss by avg_factor
if reduction == 'mean':
loss = loss.sum() / avg_factor
# if reduction is 'none', then do nothing, otherwise raise an error
elif reduction != 'none':
raise ValueError('avg_factor can not be used with reduction="sum"')
return loss
def weighted_loss(loss_func):
"""Create a weighted version of a given loss function.
To use this decorator, the loss function must have the signature like
`loss_func(pred, target, **kwargs)`. The function only needs to compute
element-wise loss without any reduction. This decorator will add weight
and reduction arguments to the function. The decorated function will have
the signature like `loss_func(pred, target, weight=None, reduction='mean',
avg_factor=None, **kwargs)`.
:Example:
>>> import torch
>>> @weighted_loss
>>> def l1_loss(pred, target):
>>> return (pred - target).abs()
>>> pred = torch.Tensor([0, 2, 3])
>>> target = torch.Tensor([1, 1, 1])
>>> weight = torch.Tensor([1, 0, 1])
>>> l1_loss(pred, target)
tensor(1.3333)
>>> l1_loss(pred, target, weight)
tensor(1.)
>>> l1_loss(pred, target, reduction='none')
tensor([1., 1., 2.])
>>> l1_loss(pred, target, weight, avg_factor=2)
tensor(1.5000)
"""
@functools.wraps(loss_func)
def wrapper(pred,
target,
weight=None,
reduction='mean',
avg_factor=None,
**kwargs):
# get element-wise loss
loss = loss_func(pred, target, **kwargs)
loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
return loss
return wrapper
def convert_to_one_hot(targets: torch.Tensor, classes) -> torch.Tensor:
"""This function converts target class indices to one-hot vectors, given
the number of classes.
Args:
targets (Tensor): The ground truth label of the prediction
with shape (N, 1)
classes (int): the number of classes.
Returns:
Tensor: Processed loss values.
"""
assert (torch.max(targets).item() <
classes), 'Class Index must be less than number of classes'
one_hot_targets = torch.zeros((targets.shape[0], classes),
dtype=torch.long,
device=targets.device)
one_hot_targets.scatter_(1, targets.long(), 1)
return one_hot_targets
from .gap import GlobalAveragePooling
__all__ = ['GlobalAveragePooling']
import torch
import torch.nn as nn
from ..builder import NECKS
@NECKS.register_module()
class GlobalAveragePooling(nn.Module):
"""Global Average Pooling neck.
Note that we use `view` to remove extra channel after pooling. We do not
use `squeeze` as it will also remove the batch dimension when the tensor
has a batch dimension of size 1, which can lead to unexpected errors.
Args:
dim (int): Dimensions of each sample channel, can be one of {1, 2, 3}.
Default: 2
"""
def __init__(self, dim=2):
super(GlobalAveragePooling, self).__init__()
assert dim in [1, 2, 3], 'GlobalAveragePooling dim only support ' \
f'{1, 2, 3}, get {dim} instead.'
if dim == 1:
self.gap = nn.AdaptiveAvgPool1d(1)
elif dim == 2:
self.gap = nn.AdaptiveAvgPool2d((1, 1))
else:
self.gap = nn.AdaptiveAvgPool3d((1, 1, 1))
def init_weights(self):
pass
def forward(self, inputs):
if isinstance(inputs, tuple):
outs = tuple([self.gap(x) for x in inputs])
outs = tuple(
[out.view(x.size(0), -1) for out, x in zip(outs, inputs)])
elif isinstance(inputs, torch.Tensor):
outs = self.gap(inputs)
outs = outs.view(inputs.size(0), -1)
else:
raise TypeError('neck inputs should be tuple or torch.tensor')
return outs
from .augment.augments import Augments
from .channel_shuffle import channel_shuffle
from .helpers import to_2tuple, to_3tuple, to_4tuple, to_ntuple
from .inverted_residual import InvertedResidual
from .make_divisible import make_divisible
from .se_layer import SELayer
__all__ = [
'channel_shuffle', 'make_divisible', 'InvertedResidual', 'SELayer',
'to_ntuple', 'to_2tuple', 'to_3tuple', 'to_4tuple', 'Augments'
]
from .augments import Augments
from .cutmix import BatchCutMixLayer
from .identity import Identity
from .mixup import BatchMixupLayer
__all__ = ['Augments', 'BatchCutMixLayer', 'Identity', 'BatchMixupLayer']
import random
import numpy as np
from .builder import build_augment
class Augments(object):
"""Data augments.
We implement some data augmentation methods, such as mixup, cutmix.
Args:
augments_cfg (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict`):
Config dict of augments
Example:
>>> augments_cfg = [
dict(type='BatchCutMix', alpha=1., num_classes=10, prob=0.5),
dict(type='BatchMixup', alpha=1., num_classes=10, prob=0.3)
]
>>> augments = Augments(augments_cfg)
>>> imgs = torch.randn(16, 3, 32, 32)
>>> label = torch.randint(0, 10, (16, ))
>>> imgs, label = augments(imgs, label)
To decide which augmentation within Augments block is used
the following rule is applied.
We pick augmentation based on the probabilities. In the example above,
we decide if we should use BatchCutMix with probability 0.5,
BatchMixup 0.3. As Identity is not in augments_cfg, we use Identity with
probability 1 - 0.5 - 0.3 = 0.2.
"""
def __init__(self, augments_cfg):
super(Augments, self).__init__()
if isinstance(augments_cfg, dict):
augments_cfg = [augments_cfg]
assert len(augments_cfg) > 0, \
'The length of augments_cfg should be positive.'
self.augments = [build_augment(cfg) for cfg in augments_cfg]
self.augment_probs = [aug.prob for aug in self.augments]
has_identity = any([cfg['type'] == 'Identity' for cfg in augments_cfg])
if has_identity:
assert sum(self.augment_probs) == 1.0,\
'The sum of augmentation probabilities should equal to 1,' \
' but got {:.2f}'.format(sum(self.augment_probs))
else:
assert sum(self.augment_probs) <= 1.0,\
'The sum of augmentation probabilities should less than or ' \
'equal to 1, but got {:.2f}'.format(sum(self.augment_probs))
identity_prob = 1 - sum(self.augment_probs)
if identity_prob > 0:
num_classes = self.augments[0].num_classes
self.augments += [
build_augment(
dict(
type='Identity',
num_classes=num_classes,
prob=identity_prob))
]
self.augment_probs += [identity_prob]
def __call__(self, img, gt_label):
if self.augments:
random_state = np.random.RandomState(random.randint(0, 2**32 - 1))
aug = random_state.choice(self.augments, p=self.augment_probs)
return aug(img, gt_label)
return img, gt_label
from mmcv.utils import Registry, build_from_cfg
AUGMENT = Registry('augment')
def build_augment(cfg, default_args=None):
return build_from_cfg(cfg, AUGMENT, default_args)
from abc import ABCMeta, abstractmethod
import numpy as np
import torch
import torch.nn.functional as F
from .builder import AUGMENT
class BaseCutMixLayer(object, metaclass=ABCMeta):
"""Base class for CutMixLayer.
Args:
alpha (float): Parameters for Beta distribution. Positive(>0)
num_classes (int): The number of classes
prob (float): MixUp probability. It should be in range [0, 1].
Default to 1.0
cutmix_minmax (List[float], optional): cutmix min/max image ratio.
(as percent of image size). When cutmix_minmax is not None, we
generate cutmix bounding-box using cutmix_minmax instead of alpha
correct_lam (bool): Whether to apply lambda correction when cutmix bbox
clipped by image borders. Default to True
"""
def __init__(self,
alpha,
num_classes,
prob=1.0,
cutmix_minmax=None,
correct_lam=True):
super(BaseCutMixLayer, self).__init__()
assert isinstance(alpha, float) and alpha > 0
assert isinstance(num_classes, int)
assert isinstance(prob, float) and 0.0 <= prob <= 1.0
self.alpha = alpha
self.num_classes = num_classes
self.prob = prob
self.cutmix_minmax = cutmix_minmax
self.correct_lam = correct_lam
def rand_bbox_minmax(self, img_shape, count=None):
"""Min-Max CutMix bounding-box Inspired by Darknet cutmix
implementation. It generates a random rectangular bbox based on min/max
percent values applied to each dimension of the input image.
Typical defaults for minmax are usually in the .2-.3 for min and
.8-.9 range for max.
Args:
img_shape (tuple): Image shape as tuple
count (int, optional): Number of bbox to generate. Default to None
"""
assert len(self.cutmix_minmax) == 2
img_h, img_w = img_shape[-2:]
cut_h = np.random.randint(
int(img_h * self.cutmix_minmax[0]),
int(img_h * self.cutmix_minmax[1]),
size=count)
cut_w = np.random.randint(
int(img_w * self.cutmix_minmax[0]),
int(img_w * self.cutmix_minmax[1]),
size=count)
yl = np.random.randint(0, img_h - cut_h, size=count)
xl = np.random.randint(0, img_w - cut_w, size=count)
yu = yl + cut_h
xu = xl + cut_w
return yl, yu, xl, xu
def rand_bbox(self, img_shape, lam, margin=0., count=None):
"""Standard CutMix bounding-box that generates a random square bbox
based on lambda value. This implementation includes support for
enforcing a border margin as percent of bbox dimensions.
Args:
img_shape (tuple): Image shape as tuple
lam (float): Cutmix lambda value
margin (float): Percentage of bbox dimension to enforce as margin
(reduce amount of box outside image). Default to 0.
count (int, optional): Number of bbox to generate. Default to None
"""
ratio = np.sqrt(1 - lam)
img_h, img_w = img_shape[-2:]
cut_h, cut_w = int(img_h * ratio), int(img_w * ratio)
margin_y, margin_x = int(margin * cut_h), int(margin * cut_w)
cy = np.random.randint(0 + margin_y, img_h - margin_y, size=count)
cx = np.random.randint(0 + margin_x, img_w - margin_x, size=count)
yl = np.clip(cy - cut_h // 2, 0, img_h)
yh = np.clip(cy + cut_h // 2, 0, img_h)
xl = np.clip(cx - cut_w // 2, 0, img_w)
xh = np.clip(cx + cut_w // 2, 0, img_w)
return yl, yh, xl, xh
def cutmix_bbox_and_lam(self, img_shape, lam, count=None):
"""Generate bbox and apply lambda correction.
Args:
img_shape (tuple): Image shape as tuple
lam (float): Cutmix lambda value
count (int, optional): Number of bbox to generate. Default to None
"""
if self.cutmix_minmax is not None:
yl, yu, xl, xu = self.rand_bbox_minmax(img_shape, count=count)
else:
yl, yu, xl, xu = self.rand_bbox(img_shape, lam, count=count)
if self.correct_lam or self.cutmix_minmax is not None:
bbox_area = (yu - yl) * (xu - xl)
lam = 1. - bbox_area / float(img_shape[-2] * img_shape[-1])
return (yl, yu, xl, xu), lam
@abstractmethod
def cutmix(self, imgs, gt_label):
pass
@AUGMENT.register_module(name='BatchCutMix')
class BatchCutMixLayer(BaseCutMixLayer):
"""CutMix layer for batch CutMix."""
def __init__(self, *args, **kwargs):
super(BatchCutMixLayer, self).__init__(*args, **kwargs)
def cutmix(self, img, gt_label):
one_hot_gt_label = F.one_hot(gt_label, num_classes=self.num_classes)
lam = np.random.beta(self.alpha, self.alpha)
batch_size = img.size(0)
index = torch.randperm(batch_size)
(bby1, bby2, bbx1,
bbx2), lam = self.cutmix_bbox_and_lam(img.shape, lam)
img[:, :, bby1:bby2, bbx1:bbx2] = \
img[index, :, bby1:bby2, bbx1:bbx2]
mixed_gt_label = lam * one_hot_gt_label + (
1 - lam) * one_hot_gt_label[index, :]
return img, mixed_gt_label
def __call__(self, img, gt_label):
return self.cutmix(img, gt_label)
import torch.nn.functional as F
from .builder import AUGMENT
@AUGMENT.register_module(name='Identity')
class Identity(object):
"""Change gt_label to one_hot encoding and keep img as the same.
Args:
num_classes (int): The number of classes.
prob (float): MixUp probability. It should be in range [0, 1].
Default to 1.0
"""
def __init__(self, num_classes, prob=1.0):
super(Identity, self).__init__()
assert isinstance(num_classes, int)
assert isinstance(prob, float) and 0.0 <= prob <= 1.0
self.num_classes = num_classes
self.prob = prob
def one_hot(self, gt_label):
return F.one_hot(gt_label, num_classes=self.num_classes)
def __call__(self, img, gt_label):
return img, self.one_hot(gt_label)
from abc import ABCMeta, abstractmethod
import numpy as np
import torch
import torch.nn.functional as F
from .builder import AUGMENT
class BaseMixupLayer(object, metaclass=ABCMeta):
"""Base class for MixupLayer.
Args:
alpha (float): Parameters for Beta distribution.
num_classes (int): The number of classes.
prob (float): MixUp probability. It should be in range [0, 1].
Default to 1.0
"""
def __init__(self, alpha, num_classes, prob=1.0):
super(BaseMixupLayer, self).__init__()
assert isinstance(alpha, float) and alpha > 0
assert isinstance(num_classes, int)
assert isinstance(prob, float) and 0.0 <= prob <= 1.0
self.alpha = alpha
self.num_classes = num_classes
self.prob = prob
@abstractmethod
def mixup(self, imgs, gt_label):
pass
@AUGMENT.register_module(name='BatchMixup')
class BatchMixupLayer(BaseMixupLayer):
"""Mixup layer for batch mixup."""
def __init__(self, *args, **kwargs):
super(BatchMixupLayer, self).__init__(*args, **kwargs)
def mixup(self, img, gt_label):
one_hot_gt_label = F.one_hot(gt_label, num_classes=self.num_classes)
lam = np.random.beta(self.alpha, self.alpha)
batch_size = img.size(0)
index = torch.randperm(batch_size)
mixed_img = lam * img + (1 - lam) * img[index, :]
mixed_gt_label = lam * one_hot_gt_label + (
1 - lam) * one_hot_gt_label[index, :]
return mixed_img, mixed_gt_label
def __call__(self, img, gt_label):
return self.mixup(img, gt_label)
import torch
def channel_shuffle(x, groups):
"""Channel Shuffle operation.
This function enables cross-group information flow for multiple groups
convolution layers.
Args:
x (Tensor): The input tensor.
groups (int): The number of groups to divide the input tensor
in the channel dimension.
Returns:
Tensor: The output tensor after channel shuffle operation.
"""
batch_size, num_channels, height, width = x.size()
assert (num_channels % groups == 0), ('num_channels should be '
'divisible by groups')
channels_per_group = num_channels // groups
x = x.view(batch_size, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(batch_size, -1, height, width)
return x
import collections.abc
from itertools import repeat
# From PyTorch internals
def _ntuple(n):
def parse(x):
if isinstance(x, collections.abc.Iterable):
return x
return tuple(repeat(x, n))
return parse
to_1tuple = _ntuple(1)
to_2tuple = _ntuple(2)
to_3tuple = _ntuple(3)
to_4tuple = _ntuple(4)
to_ntuple = _ntuple
import torch.utils.checkpoint as cp
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from .se_layer import SELayer
# class InvertedResidual(nn.Module):
class InvertedResidual(BaseModule):
"""Inverted Residual Block.
Args:
in_channels (int): The input channels of this Module.
out_channels (int): The output channels of this Module.
mid_channels (int): The input channels of the depthwise convolution.
kernel_size (int): The kernal size of the depthwise convolution.
Default: 3.
stride (int): The stride of the depthwise convolution. Default: 1.
se_cfg (dict): Config dict for se layer. Defaul: None, which means no
se layer.
with_expand_conv (bool): Use expand conv or not. If set False,
mid_channels must be the same with in_channels.
Default: True.
conv_cfg (dict): Config dict for convolution layer. Default: None,
which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
Returns:
Tensor: The output tensor.
"""
def __init__(self,
in_channels,
out_channels,
mid_channels,
kernel_size=3,
stride=1,
se_cfg=None,
with_expand_conv=True,
conv_cfg=None,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
with_cp=False,
init_cfg=None):
super(InvertedResidual, self).__init__(init_cfg)
self.with_res_shortcut = (stride == 1 and in_channels == out_channels)
assert stride in [1, 2]
self.with_cp = with_cp
self.with_se = se_cfg is not None
self.with_expand_conv = with_expand_conv
if self.with_se:
assert isinstance(se_cfg, dict)
if not self.with_expand_conv:
assert mid_channels == in_channels
if self.with_expand_conv:
self.expand_conv = ConvModule(
in_channels=in_channels,
out_channels=mid_channels,
kernel_size=1,
stride=1,
padding=0,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
self.depthwise_conv = ConvModule(
in_channels=mid_channels,
out_channels=mid_channels,
kernel_size=kernel_size,
stride=stride,
padding=kernel_size // 2,
groups=mid_channels,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
if self.with_se:
self.se = SELayer(**se_cfg)
self.linear_conv = ConvModule(
in_channels=mid_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
def forward(self, x):
def _inner_forward(x):
out = x
if self.with_expand_conv:
out = self.expand_conv(out)
out = self.depthwise_conv(out)
if self.with_se:
out = self.se(out)
out = self.linear_conv(out)
if self.with_res_shortcut:
return x + out
else:
return out
if self.with_cp and x.requires_grad:
out = cp.checkpoint(_inner_forward, x)
else:
out = _inner_forward(x)
return out
def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
"""Make divisible function.
This function rounds the channel number down to the nearest value that can
be divisible by the divisor.
Args:
value (int): The original channel number.
divisor (int): The divisor to fully divide the channel number.
min_value (int, optional): The minimum value of the output channel.
Default: None, means that the minimum value equal to the divisor.
min_ratio (float): The minimum ratio of the rounded channel
number to the original channel number. Default: 0.9.
Returns:
int: The modified output channel number
"""
if min_value is None:
min_value = divisor
new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than (1-min_ratio).
if new_value < min_ratio * value:
new_value += divisor
return new_value
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment