Commit 322546ff authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'add_Recommendation' into 'main'

添加openmmlab测试用例

See merge request dcutoolkit/deeplearing/dlexamples_new!32
parents 1f4ba993 8c867a92
from .augments import Augments
from .cutmix import BatchCutMixLayer
from .identity import Identity
from .mixup import BatchMixupLayer
__all__ = ['Augments', 'BatchCutMixLayer', 'Identity', 'BatchMixupLayer']
import random
import numpy as np
from .builder import build_augment
class Augments(object):
"""Data augments.
We implement some data augmentation methods, such as mixup, cutmix.
Args:
augments_cfg (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict`):
Config dict of augments
Example:
>>> augments_cfg = [
dict(type='BatchCutMix', alpha=1., num_classes=10, prob=0.5),
dict(type='BatchMixup', alpha=1., num_classes=10, prob=0.3)
]
>>> augments = Augments(augments_cfg)
>>> imgs = torch.randn(16, 3, 32, 32)
>>> label = torch.randint(0, 10, (16, ))
>>> imgs, label = augments(imgs, label)
To decide which augmentation within Augments block is used
the following rule is applied.
We pick augmentation based on the probabilities. In the example above,
we decide if we should use BatchCutMix with probability 0.5,
BatchMixup 0.3. As Identity is not in augments_cfg, we use Identity with
probability 1 - 0.5 - 0.3 = 0.2.
"""
def __init__(self, augments_cfg):
super(Augments, self).__init__()
if isinstance(augments_cfg, dict):
augments_cfg = [augments_cfg]
assert len(augments_cfg) > 0, \
'The length of augments_cfg should be positive.'
self.augments = [build_augment(cfg) for cfg in augments_cfg]
self.augment_probs = [aug.prob for aug in self.augments]
has_identity = any([cfg['type'] == 'Identity' for cfg in augments_cfg])
if has_identity:
assert sum(self.augment_probs) == 1.0,\
'The sum of augmentation probabilities should equal to 1,' \
' but got {:.2f}'.format(sum(self.augment_probs))
else:
assert sum(self.augment_probs) <= 1.0,\
'The sum of augmentation probabilities should less than or ' \
'equal to 1, but got {:.2f}'.format(sum(self.augment_probs))
identity_prob = 1 - sum(self.augment_probs)
if identity_prob > 0:
num_classes = self.augments[0].num_classes
self.augments += [
build_augment(
dict(
type='Identity',
num_classes=num_classes,
prob=identity_prob))
]
self.augment_probs += [identity_prob]
def __call__(self, img, gt_label):
if self.augments:
random_state = np.random.RandomState(random.randint(0, 2**32 - 1))
aug = random_state.choice(self.augments, p=self.augment_probs)
return aug(img, gt_label)
return img, gt_label
from mmcv.utils import Registry, build_from_cfg
AUGMENT = Registry('augment')
def build_augment(cfg, default_args=None):
return build_from_cfg(cfg, AUGMENT, default_args)
from abc import ABCMeta, abstractmethod
import numpy as np
import torch
import torch.nn.functional as F
from .builder import AUGMENT
class BaseCutMixLayer(object, metaclass=ABCMeta):
"""Base class for CutMixLayer.
Args:
alpha (float): Parameters for Beta distribution. Positive(>0)
num_classes (int): The number of classes
prob (float): MixUp probability. It should be in range [0, 1].
Default to 1.0
cutmix_minmax (List[float], optional): cutmix min/max image ratio.
(as percent of image size). When cutmix_minmax is not None, we
generate cutmix bounding-box using cutmix_minmax instead of alpha
correct_lam (bool): Whether to apply lambda correction when cutmix bbox
clipped by image borders. Default to True
"""
def __init__(self,
alpha,
num_classes,
prob=1.0,
cutmix_minmax=None,
correct_lam=True):
super(BaseCutMixLayer, self).__init__()
assert isinstance(alpha, float) and alpha > 0
assert isinstance(num_classes, int)
assert isinstance(prob, float) and 0.0 <= prob <= 1.0
self.alpha = alpha
self.num_classes = num_classes
self.prob = prob
self.cutmix_minmax = cutmix_minmax
self.correct_lam = correct_lam
def rand_bbox_minmax(self, img_shape, count=None):
"""Min-Max CutMix bounding-box Inspired by Darknet cutmix
implementation. It generates a random rectangular bbox based on min/max
percent values applied to each dimension of the input image.
Typical defaults for minmax are usually in the .2-.3 for min and
.8-.9 range for max.
Args:
img_shape (tuple): Image shape as tuple
count (int, optional): Number of bbox to generate. Default to None
"""
assert len(self.cutmix_minmax) == 2
img_h, img_w = img_shape[-2:]
cut_h = np.random.randint(
int(img_h * self.cutmix_minmax[0]),
int(img_h * self.cutmix_minmax[1]),
size=count)
cut_w = np.random.randint(
int(img_w * self.cutmix_minmax[0]),
int(img_w * self.cutmix_minmax[1]),
size=count)
yl = np.random.randint(0, img_h - cut_h, size=count)
xl = np.random.randint(0, img_w - cut_w, size=count)
yu = yl + cut_h
xu = xl + cut_w
return yl, yu, xl, xu
def rand_bbox(self, img_shape, lam, margin=0., count=None):
"""Standard CutMix bounding-box that generates a random square bbox
based on lambda value. This implementation includes support for
enforcing a border margin as percent of bbox dimensions.
Args:
img_shape (tuple): Image shape as tuple
lam (float): Cutmix lambda value
margin (float): Percentage of bbox dimension to enforce as margin
(reduce amount of box outside image). Default to 0.
count (int, optional): Number of bbox to generate. Default to None
"""
ratio = np.sqrt(1 - lam)
img_h, img_w = img_shape[-2:]
cut_h, cut_w = int(img_h * ratio), int(img_w * ratio)
margin_y, margin_x = int(margin * cut_h), int(margin * cut_w)
cy = np.random.randint(0 + margin_y, img_h - margin_y, size=count)
cx = np.random.randint(0 + margin_x, img_w - margin_x, size=count)
yl = np.clip(cy - cut_h // 2, 0, img_h)
yh = np.clip(cy + cut_h // 2, 0, img_h)
xl = np.clip(cx - cut_w // 2, 0, img_w)
xh = np.clip(cx + cut_w // 2, 0, img_w)
return yl, yh, xl, xh
def cutmix_bbox_and_lam(self, img_shape, lam, count=None):
"""Generate bbox and apply lambda correction.
Args:
img_shape (tuple): Image shape as tuple
lam (float): Cutmix lambda value
count (int, optional): Number of bbox to generate. Default to None
"""
if self.cutmix_minmax is not None:
yl, yu, xl, xu = self.rand_bbox_minmax(img_shape, count=count)
else:
yl, yu, xl, xu = self.rand_bbox(img_shape, lam, count=count)
if self.correct_lam or self.cutmix_minmax is not None:
bbox_area = (yu - yl) * (xu - xl)
lam = 1. - bbox_area / float(img_shape[-2] * img_shape[-1])
return (yl, yu, xl, xu), lam
@abstractmethod
def cutmix(self, imgs, gt_label):
pass
@AUGMENT.register_module(name='BatchCutMix')
class BatchCutMixLayer(BaseCutMixLayer):
"""CutMix layer for batch CutMix."""
def __init__(self, *args, **kwargs):
super(BatchCutMixLayer, self).__init__(*args, **kwargs)
def cutmix(self, img, gt_label):
one_hot_gt_label = F.one_hot(gt_label, num_classes=self.num_classes)
lam = np.random.beta(self.alpha, self.alpha)
batch_size = img.size(0)
index = torch.randperm(batch_size)
(bby1, bby2, bbx1,
bbx2), lam = self.cutmix_bbox_and_lam(img.shape, lam)
img[:, :, bby1:bby2, bbx1:bbx2] = \
img[index, :, bby1:bby2, bbx1:bbx2]
mixed_gt_label = lam * one_hot_gt_label + (
1 - lam) * one_hot_gt_label[index, :]
return img, mixed_gt_label
def __call__(self, img, gt_label):
return self.cutmix(img, gt_label)
import torch.nn.functional as F
from .builder import AUGMENT
@AUGMENT.register_module(name='Identity')
class Identity(object):
"""Change gt_label to one_hot encoding and keep img as the same.
Args:
num_classes (int): The number of classes.
prob (float): MixUp probability. It should be in range [0, 1].
Default to 1.0
"""
def __init__(self, num_classes, prob=1.0):
super(Identity, self).__init__()
assert isinstance(num_classes, int)
assert isinstance(prob, float) and 0.0 <= prob <= 1.0
self.num_classes = num_classes
self.prob = prob
def one_hot(self, gt_label):
return F.one_hot(gt_label, num_classes=self.num_classes)
def __call__(self, img, gt_label):
return img, self.one_hot(gt_label)
from abc import ABCMeta, abstractmethod
import numpy as np
import torch
import torch.nn.functional as F
from .builder import AUGMENT
class BaseMixupLayer(object, metaclass=ABCMeta):
"""Base class for MixupLayer.
Args:
alpha (float): Parameters for Beta distribution.
num_classes (int): The number of classes.
prob (float): MixUp probability. It should be in range [0, 1].
Default to 1.0
"""
def __init__(self, alpha, num_classes, prob=1.0):
super(BaseMixupLayer, self).__init__()
assert isinstance(alpha, float) and alpha > 0
assert isinstance(num_classes, int)
assert isinstance(prob, float) and 0.0 <= prob <= 1.0
self.alpha = alpha
self.num_classes = num_classes
self.prob = prob
@abstractmethod
def mixup(self, imgs, gt_label):
pass
@AUGMENT.register_module(name='BatchMixup')
class BatchMixupLayer(BaseMixupLayer):
"""Mixup layer for batch mixup."""
def __init__(self, *args, **kwargs):
super(BatchMixupLayer, self).__init__(*args, **kwargs)
def mixup(self, img, gt_label):
one_hot_gt_label = F.one_hot(gt_label, num_classes=self.num_classes)
lam = np.random.beta(self.alpha, self.alpha)
batch_size = img.size(0)
index = torch.randperm(batch_size)
mixed_img = lam * img + (1 - lam) * img[index, :]
mixed_gt_label = lam * one_hot_gt_label + (
1 - lam) * one_hot_gt_label[index, :]
return mixed_img, mixed_gt_label
def __call__(self, img, gt_label):
return self.mixup(img, gt_label)
import torch
def channel_shuffle(x, groups):
"""Channel Shuffle operation.
This function enables cross-group information flow for multiple groups
convolution layers.
Args:
x (Tensor): The input tensor.
groups (int): The number of groups to divide the input tensor
in the channel dimension.
Returns:
Tensor: The output tensor after channel shuffle operation.
"""
batch_size, num_channels, height, width = x.size()
assert (num_channels % groups == 0), ('num_channels should be '
'divisible by groups')
channels_per_group = num_channels // groups
x = x.view(batch_size, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(batch_size, -1, height, width)
return x
import collections.abc
from itertools import repeat
# From PyTorch internals
def _ntuple(n):
def parse(x):
if isinstance(x, collections.abc.Iterable):
return x
return tuple(repeat(x, n))
return parse
to_1tuple = _ntuple(1)
to_2tuple = _ntuple(2)
to_3tuple = _ntuple(3)
to_4tuple = _ntuple(4)
to_ntuple = _ntuple
import torch.utils.checkpoint as cp
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from .se_layer import SELayer
# class InvertedResidual(nn.Module):
class InvertedResidual(BaseModule):
"""Inverted Residual Block.
Args:
in_channels (int): The input channels of this Module.
out_channels (int): The output channels of this Module.
mid_channels (int): The input channels of the depthwise convolution.
kernel_size (int): The kernal size of the depthwise convolution.
Default: 3.
stride (int): The stride of the depthwise convolution. Default: 1.
se_cfg (dict): Config dict for se layer. Defaul: None, which means no
se layer.
with_expand_conv (bool): Use expand conv or not. If set False,
mid_channels must be the same with in_channels.
Default: True.
conv_cfg (dict): Config dict for convolution layer. Default: None,
which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
Returns:
Tensor: The output tensor.
"""
def __init__(self,
in_channels,
out_channels,
mid_channels,
kernel_size=3,
stride=1,
se_cfg=None,
with_expand_conv=True,
conv_cfg=None,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
with_cp=False,
init_cfg=None):
super(InvertedResidual, self).__init__(init_cfg)
self.with_res_shortcut = (stride == 1 and in_channels == out_channels)
assert stride in [1, 2]
self.with_cp = with_cp
self.with_se = se_cfg is not None
self.with_expand_conv = with_expand_conv
if self.with_se:
assert isinstance(se_cfg, dict)
if not self.with_expand_conv:
assert mid_channels == in_channels
if self.with_expand_conv:
self.expand_conv = ConvModule(
in_channels=in_channels,
out_channels=mid_channels,
kernel_size=1,
stride=1,
padding=0,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
self.depthwise_conv = ConvModule(
in_channels=mid_channels,
out_channels=mid_channels,
kernel_size=kernel_size,
stride=stride,
padding=kernel_size // 2,
groups=mid_channels,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
if self.with_se:
self.se = SELayer(**se_cfg)
self.linear_conv = ConvModule(
in_channels=mid_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
def forward(self, x):
def _inner_forward(x):
out = x
if self.with_expand_conv:
out = self.expand_conv(out)
out = self.depthwise_conv(out)
if self.with_se:
out = self.se(out)
out = self.linear_conv(out)
if self.with_res_shortcut:
return x + out
else:
return out
if self.with_cp and x.requires_grad:
out = cp.checkpoint(_inner_forward, x)
else:
out = _inner_forward(x)
return out
def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
"""Make divisible function.
This function rounds the channel number down to the nearest value that can
be divisible by the divisor.
Args:
value (int): The original channel number.
divisor (int): The divisor to fully divide the channel number.
min_value (int, optional): The minimum value of the output channel.
Default: None, means that the minimum value equal to the divisor.
min_ratio (float): The minimum ratio of the rounded channel
number to the original channel number. Default: 0.9.
Returns:
int: The modified output channel number
"""
if min_value is None:
min_value = divisor
new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than (1-min_ratio).
if new_value < min_ratio * value:
new_value += divisor
return new_value
import mmcv
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
# class SELayer(nn.Module):
class SELayer(BaseModule):
"""Squeeze-and-Excitation Module.
Args:
channels (int): The input (and output) channels of the SE layer.
ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
``int(channels/ratio)``. Default: 16.
conv_cfg (None or dict): Config dict for convolution layer.
Default: None, which means using conv2d.
act_cfg (dict or Sequence[dict]): Config dict for activation layer.
If act_cfg is a dict, two activation layers will be configurated
by this dict. If act_cfg is a sequence of dicts, the first
activation layer will be configurated by the first dict and the
second activation layer will be configurated by the second dict.
Default: (dict(type='ReLU'), dict(type='Sigmoid'))
"""
def __init__(self,
channels,
ratio=16,
conv_cfg=None,
act_cfg=(dict(type='ReLU'), dict(type='Sigmoid')),
init_cfg=None):
super(SELayer, self).__init__(init_cfg)
if isinstance(act_cfg, dict):
act_cfg = (act_cfg, act_cfg)
assert len(act_cfg) == 2
assert mmcv.is_tuple_of(act_cfg, dict)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.conv1 = ConvModule(
in_channels=channels,
out_channels=int(channels / ratio),
kernel_size=1,
stride=1,
conv_cfg=conv_cfg,
act_cfg=act_cfg[0])
self.conv2 = ConvModule(
in_channels=int(channels / ratio),
out_channels=channels,
kernel_size=1,
stride=1,
conv_cfg=conv_cfg,
act_cfg=act_cfg[1])
def forward(self, x):
out = self.global_avgpool(x)
out = self.conv1(out)
out = self.conv2(out)
return x * out
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment