添加openmmlab测试用例

85529f35 · unknown · b21b0c01 · 85529f35 · 85529f35 · 85529f35
Commit 85529f35 authored Jul 30, 2022 by unknown
20 changed files
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/__init__.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/__init__.py
+from .accuracy import Accuracy, accuracy
+from .asymmetric_loss import AsymmetricLoss, asymmetric_loss
+from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
+                                 cross_entropy)
+from .focal_loss import FocalLoss, sigmoid_focal_loss
+from .label_smooth_loss import LabelSmoothLoss
+from .utils import (convert_to_one_hot, reduce_loss, weight_reduce_loss,
+                    weighted_loss)
+
+__all__ = [
+    'accuracy', 'Accuracy', 'asymmetric_loss', 'AsymmetricLoss',
+    'cross_entropy', 'binary_cross_entropy', 'CrossEntropyLoss', 'reduce_loss',
+    'weight_reduce_loss', 'LabelSmoothLoss', 'weighted_loss', 'FocalLoss',
+    'sigmoid_focal_loss', 'convert_to_one_hot'
+]
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/accuracy.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/accuracy.py
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+def accuracy_numpy(pred, target, topk=1, thrs=None):
+    if thrs is None:
+        thrs = 0.0
+    if isinstance(thrs, float):
+        thrs = (thrs, )
+        res_single = True
+    elif isinstance(thrs, tuple):
+        res_single = False
+    else:
+        raise TypeError(
+            f'thrs should be float or tuple, but got {type(thrs)}.')
+
+    res = []
+    maxk = max(topk)
+    num = pred.shape[0]
+    pred_label = pred.argsort(axis=1)[:, -maxk:][:, ::-1]
+    pred_score = np.sort(pred, axis=1)[:, -maxk:][:, ::-1]
+
+    for k in topk:
+        correct_k = pred_label[:, :k] == target.reshape(-1, 1)
+        res_thr = []
+        for thr in thrs:
+            # Only prediction values larger than thr are counted as correct
+            _correct_k = correct_k & (pred_score[:, :k] > thr)
+            _correct_k = np.logical_or.reduce(_correct_k, axis=1)
+            res_thr.append(_correct_k.sum() * 100. / num)
+        if res_single:
+            res.append(res_thr[0])
+        else:
+            res.append(res_thr)
+    return res
+
+
+def accuracy_torch(pred, target, topk=1, thrs=None):
+    if thrs is None:
+        thrs = 0.0
+    if isinstance(thrs, float):
+        thrs = (thrs, )
+        res_single = True
+    elif isinstance(thrs, tuple):
+        res_single = False
+    else:
+        raise TypeError(
+            f'thrs should be float or tuple, but got {type(thrs)}.')
+
+    res = []
+    maxk = max(topk)
+    num = pred.size(0)
+    pred_score, pred_label = pred.topk(maxk, dim=1)
+    pred_label = pred_label.t()
+    correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
+    for k in topk:
+        res_thr = []
+        for thr in thrs:
+            # Only prediction values larger than thr are counted as correct
+            _correct = correct & (pred_score.t() > thr)
+            correct_k = _correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res_thr.append(correct_k.mul_(100. / num))
+        if res_single:
+            res.append(res_thr[0])
+        else:
+            res.append(res_thr)
+    return res
+
+
+def accuracy(pred, target, topk=1, thrs=None):
+    """Calculate accuracy according to the prediction and target.
+
+    Args:
+        pred (torch.Tensor | np.array): The model prediction.
+        target (torch.Tensor | np.array): The target of each prediction
+        topk (int | tuple[int]): If the predictions in ``topk``
+            matches the target, the predictions will be regarded as
+            correct ones. Defaults to 1.
+        thrs (float, optional): thrs (float | tuple[float], optional):
+            Predictions with scores under the thresholds are considered
+            negative. Default to None.
+
+    Returns:
+        float | list[float] | list[list[float]]: If the input ``topk`` is a
+            single integer, the function will return a single float or a list
+            depending on whether ``thrs`` is a single float. If the input
+            ``topk`` is a tuple, the function will return a list of results
+            of accuracies of each ``topk`` number. That is to say, as long as
+            ``topk`` is a tuple, the returned list shall be of the same length
+            as topk.
+    """
+    assert isinstance(topk, (int, tuple))
+    if isinstance(topk, int):
+        topk = (topk, )
+        return_single = True
+    else:
+        return_single = False
+
+    if isinstance(pred, torch.Tensor) and isinstance(target, torch.Tensor):
+        res = accuracy_torch(pred, target, topk, thrs)
+    elif isinstance(pred, np.ndarray) and isinstance(target, np.ndarray):
+        res = accuracy_numpy(pred, target, topk, thrs)
+    else:
+        raise TypeError(
+            f'pred and target should both be torch.Tensor or np.ndarray, '
+            f'but got {type(pred)} and {type(target)}.')
+
+    return res[0] if return_single else res
+
+
+class Accuracy(nn.Module):
+
+    def __init__(self, topk=(1, )):
+        """Module to calculate the accuracy.
+
+        Args:
+            topk (tuple): The criterion used to calculate the
+                accuracy. Defaults to (1,).
+        """
+        super().__init__()
+        self.topk = topk
+
+    def forward(self, pred, target):
+        """Forward function to calculate accuracy.
+
+        Args:
+            pred (torch.Tensor): Prediction of models.
+            target (torch.Tensor): Target for each prediction.
+
+        Returns:
+            list[float]: The accuracies under different topk criterions.
+        """
+        return accuracy(pred, target, self.topk)
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/asymmetric_loss.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/asymmetric_loss.py
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+from .utils import weight_reduce_loss
+
+
+def asymmetric_loss(pred,
+                    target,
+                    weight=None,
+                    gamma_pos=1.0,
+                    gamma_neg=4.0,
+                    clip=0.05,
+                    reduction='mean',
+                    avg_factor=None):
+    """asymmetric loss.
+
+    Please refer to the `paper <https://arxiv.org/abs/2009.14119>`_ for
+    details.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, *).
+        target (torch.Tensor): The ground truth label of the prediction with
+            shape (N, *).
+        weight (torch.Tensor, optional): Sample-wise loss weight with shape
+            (N, ). Dafaults to None.
+        gamma_pos (float): positive focusing parameter. Defaults to 0.0.
+        gamma_neg (float): Negative focusing parameter. We usually set
+            gamma_neg > gamma_pos. Defaults to 4.0.
+        clip (float, optional): Probability margin. Defaults to 0.05.
+        reduction (str): The method used to reduce the loss.
+            Options are "none", "mean" and "sum". If reduction is 'none' , loss
+             is same shape as pred and label. Defaults to 'mean'.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+
+    Returns:
+        torch.Tensor: Loss.
+    """
+    assert pred.shape == \
+        target.shape, 'pred and target should be in the same shape.'
+
+    eps = 1e-8
+    pred_sigmoid = pred.sigmoid()
+    target = target.type_as(pred)
+
+    if clip and clip > 0:
+        pt = (1 - pred_sigmoid +
+              clip).clamp(max=1) * (1 - target) + pred_sigmoid * target
+    else:
+        pt = (1 - pred_sigmoid) * (1 - target) + pred_sigmoid * target
+    asymmetric_weight = (1 - pt).pow(gamma_pos * target + gamma_neg *
+                                     (1 - target))
+    loss = -torch.log(pt.clamp(min=eps)) * asymmetric_weight
+    if weight is not None:
+        assert weight.dim() == 1
+        weight = weight.float()
+        if pred.dim() > 1:
+            weight = weight.reshape(-1, 1)
+    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+    return loss
+
+
+@LOSSES.register_module()
+class AsymmetricLoss(nn.Module):
+    """asymmetric loss.
+
+    Args:
+        gamma_pos (float): positive focusing parameter.
+            Defaults to 0.0.
+        gamma_neg (float): Negative focusing parameter. We
+            usually set gamma_neg > gamma_pos. Defaults to 4.0.
+        clip (float, optional): Probability margin. Defaults to 0.05.
+        reduction (str): The method used to reduce the loss into
+            a scalar.
+        loss_weight (float): Weight of loss. Defaults to 1.0.
+    """
+
+    def __init__(self,
+                 gamma_pos=0.0,
+                 gamma_neg=4.0,
+                 clip=0.05,
+                 reduction='mean',
+                 loss_weight=1.0):
+        super(AsymmetricLoss, self).__init__()
+        self.gamma_pos = gamma_pos
+        self.gamma_neg = gamma_neg
+        self.clip = clip
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self,
+                pred,
+                target,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        """asymmetric loss."""
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_cls = self.loss_weight * asymmetric_loss(
+            pred,
+            target,
+            weight,
+            gamma_pos=self.gamma_pos,
+            gamma_neg=self.gamma_neg,
+            clip=self.clip,
+            reduction=reduction,
+            avg_factor=avg_factor)
+        return loss_cls
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/cross_entropy_loss.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/cross_entropy_loss.py
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+from .utils import weight_reduce_loss
+
+
+def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None):
+    """Calculate the CrossEntropy loss.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, C), C is the number
+            of classes.
+        label (torch.Tensor): The gt label of the prediction.
+        weight (torch.Tensor, optional): Sample-wise loss weight.
+        reduction (str): The method used to reduce the loss.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    # element-wise losses
+    loss = F.cross_entropy(pred, label, reduction='none')
+
+    # apply weights and do the reduction
+    if weight is not None:
+        weight = weight.float()
+    loss = weight_reduce_loss(
+        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+
+    return loss
+
+
+def soft_cross_entropy(pred,
+                       label,
+                       weight=None,
+                       reduction='mean',
+                       avg_factor=None):
+    """Calculate the Soft CrossEntropy loss. The label can be float.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, C), C is the number
+            of classes.
+        label (torch.Tensor): The gt label of the prediction with shape (N, C).
+            When using "mixup", the label can be float.
+        weight (torch.Tensor, optional): Sample-wise loss weight.
+        reduction (str): The method used to reduce the loss.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    # element-wise losses
+    loss = -label * F.log_softmax(pred, dim=-1)
+    loss = loss.sum(dim=-1)
+
+    # apply weights and do the reduction
+    if weight is not None:
+        weight = weight.float()
+    loss = weight_reduce_loss(
+        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+
+    return loss
+
+
+def binary_cross_entropy(pred,
+                         label,
+                         weight=None,
+                         reduction='mean',
+                         avg_factor=None):
+    """Calculate the binary CrossEntropy loss with logits.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, *).
+        label (torch.Tensor): The gt label with shape (N, *).
+        weight (torch.Tensor, optional): Element-wise weight of loss with shape
+             (N, ). Defaults to None.
+        reduction (str): The method used to reduce the loss.
+            Options are "none", "mean" and "sum". If reduction is 'none' , loss
+             is same shape as pred and label. Defaults to 'mean'.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    assert pred.dim() == label.dim()
+
+    loss = F.binary_cross_entropy_with_logits(pred, label, reduction='none')
+
+    # apply weights and do the reduction
+    if weight is not None:
+        assert weight.dim() == 1
+        weight = weight.float()
+        if pred.dim() > 1:
+            weight = weight.reshape(-1, 1)
+    loss = weight_reduce_loss(
+        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+    return loss
+
+
+@LOSSES.register_module()
+class CrossEntropyLoss(nn.Module):
+    """Cross entropy loss.
+
+    Args:
+        use_sigmoid (bool): Whether the prediction uses sigmoid
+            of softmax. Defaults to False.
+        use_soft (bool): Whether to use the soft version of CrossEntropyLoss.
+            Defaults to False.
+        reduction (str): The method used to reduce the loss.
+            Options are "none", "mean" and "sum". Defaults to 'mean'.
+        loss_weight (float):  Weight of the loss. Defaults to 1.0.
+    """
+
+    def __init__(self,
+                 use_sigmoid=False,
+                 use_soft=False,
+                 reduction='mean',
+                 loss_weight=1.0):
+        super(CrossEntropyLoss, self).__init__()
+        self.use_sigmoid = use_sigmoid
+        self.use_soft = use_soft
+        assert not (
+            self.use_soft and self.use_sigmoid
+        ), 'use_sigmoid and use_soft could not be set simultaneously'
+
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+        if self.use_sigmoid:
+            self.cls_criterion = binary_cross_entropy
+        elif self.use_soft:
+            self.cls_criterion = soft_cross_entropy
+        else:
+            self.cls_criterion = cross_entropy
+
+    def forward(self,
+                cls_score,
+                label,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None,
+                **kwargs):
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_cls = self.loss_weight * self.cls_criterion(
+            cls_score,
+            label,
+            weight,
+            reduction=reduction,
+            avg_factor=avg_factor,
+            **kwargs)
+        return loss_cls
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/focal_loss.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/focal_loss.py
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+from .utils import weight_reduce_loss
+
+
+def sigmoid_focal_loss(pred,
+                       target,
+                       weight=None,
+                       gamma=2.0,
+                       alpha=0.25,
+                       reduction='mean',
+                       avg_factor=None):
+    """Sigmoid focal loss.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, *).
+        target (torch.Tensor): The ground truth label of the prediction with
+            shape (N, *).
+        weight (torch.Tensor, optional): Sample-wise loss weight with shape
+            (N, ). Dafaults to None.
+        gamma (float): The gamma for calculating the modulating factor.
+            Defaults to 2.0.
+        alpha (float): A balanced form for Focal Loss. Defaults to 0.25.
+        reduction (str): The method used to reduce the loss.
+            Options are "none", "mean" and "sum". If reduction is 'none' ,
+            loss is same shape as pred and label. Defaults to 'mean'.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+
+    Returns:
+        torch.Tensor: Loss.
+    """
+    assert pred.shape == \
+        target.shape, 'pred and target should be in the same shape.'
+    pred_sigmoid = pred.sigmoid()
+    target = target.type_as(pred)
+    pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
+    focal_weight = (alpha * target + (1 - alpha) *
+                    (1 - target)) * pt.pow(gamma)
+    loss = F.binary_cross_entropy_with_logits(
+        pred, target, reduction='none') * focal_weight
+    if weight is not None:
+        assert weight.dim() == 1
+        weight = weight.float()
+        if pred.dim() > 1:
+            weight = weight.reshape(-1, 1)
+    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+    return loss
+
+
+@LOSSES.register_module()
+class FocalLoss(nn.Module):
+    """Focal loss.
+
+    Args:
+        gamma (float): Focusing parameter in focal loss.
+            Defaults to 2.0.
+        alpha (float): The parameter in balanced form of focal
+            loss. Defaults to 0.25.
+        reduction (str): The method used to reduce the loss into
+            a scalar. Options are "none" and "mean". Defaults to 'mean'.
+        loss_weight (float): Weight of loss. Defaults to 1.0.
+    """
+
+    def __init__(self,
+                 gamma=2.0,
+                 alpha=0.25,
+                 reduction='mean',
+                 loss_weight=1.0):
+
+        super(FocalLoss, self).__init__()
+        self.gamma = gamma
+        self.alpha = alpha
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self,
+                pred,
+                target,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        """Sigmoid focal loss.
+
+        Args:
+            pred (torch.Tensor): The prediction with shape (N, *).
+            target (torch.Tensor): The ground truth label of the prediction
+                with shape (N, *).
+            weight (torch.Tensor, optional): Sample-wise loss weight with shape
+            (N, *). Dafaults to None.
+            avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+            reduction_override (str, optional): The method used to reduce the
+                loss into a scalar. Options are "none", "mean" and "sum".
+                Defaults to None.
+
+        Returns:
+            torch.Tensor: Loss.
+        """
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_cls = self.loss_weight * sigmoid_focal_loss(
+            pred,
+            target,
+            weight,
+            gamma=self.gamma,
+            alpha=self.alpha,
+            reduction=reduction,
+            avg_factor=avg_factor)
+        return loss_cls
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/label_smooth_loss.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/label_smooth_loss.py
+import warnings
+
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+from .cross_entropy_loss import CrossEntropyLoss
+from .utils import convert_to_one_hot
+
+
+@LOSSES.register_module()
+class LabelSmoothLoss(nn.Module):
+    r"""Intializer for the label smoothed cross entropy loss.
+    Refers to `Rethinking the Inception Architecture for Computer Vision` -
+        https://arxiv.org/abs/1512.00567
+
+    This decreases gap between output scores and encourages generalization.
+    Labels provided to forward can be one-hot like vectors (NxC) or class
+    indices (Nx1).
+    And this accepts linear combination of one-hot like labels from mixup or
+    cutmix except multi-label task.
+
+    Args:
+        label_smooth_val (float): The degree of label smoothing.
+        num_classes (int, optional): Number of classes. Defaults to None.
+        mode (str): Refers to notes, Options are 'original', 'classy_vision',
+            'multi_label'. Defaults to 'classy_vision'
+        reduction (str): The method used to reduce the loss.
+            Options are "none", "mean" and "sum". Defaults to 'mean'.
+        loss_weight (float):  Weight of the loss. Defaults to 1.0.
+
+    Notes:
+        if the mode is "original", this will use the same label smooth method
+        as the original paper as:
+
+        .. math::
+        (1-\epsilon)\delta_{k, y} + \frac{\epsilon}{K}
+
+        where epsilon is the `label_smooth_val`, K is the num_classes and
+        delta(k,y) is Dirac delta, which equals 1 for k=y and 0 otherwise.
+
+        if the mode is "classy_vision", this will use the same label smooth
+        method as the facebookresearch/ClassyVision repo as:
+
+        .. math::
+        \frac{\delta_{k, y} + \epsilon/K}{1+\epsilon}
+
+        if the mode is "multi_label", this will accept labels from multi-label
+        task and smoothing them as:
+
+        .. math::
+        (1-2\epsilon)\delta_{k, y} + \epsilon
+    """
+
+    def __init__(self,
+                 label_smooth_val,
+                 num_classes=None,
+                 mode=None,
+                 reduction='mean',
+                 loss_weight=1.0):
+        super().__init__()
+        self.num_classes = num_classes
+        self.loss_weight = loss_weight
+
+        assert (isinstance(label_smooth_val, float)
+                and 0 <= label_smooth_val < 1), \
+            f'LabelSmoothLoss accepts a float label_smooth_val ' \
+            f'over [0, 1), but gets {label_smooth_val}'
+        self.label_smooth_val = label_smooth_val
+
+        accept_reduction = {'none', 'mean', 'sum'}
+        assert reduction in accept_reduction, \
+            f'LabelSmoothLoss supports reduction {accept_reduction}, ' \
+            f'but gets {mode}.'
+        self.reduction = reduction
+
+        if mode is None:
+            warnings.warn(
+                'LabelSmoothLoss mode is not set, use "classy_vision" '
+                'by default. The default value will be changed to '
+                '"original" recently. Please set mode manually if want '
+                'to keep "classy_vision".', UserWarning)
+            mode = 'classy_vision'
+
+        accept_mode = {'original', 'classy_vision', 'multi_label'}
+        assert mode in accept_mode, \
+            f'LabelSmoothLoss supports mode {accept_mode}, but gets {mode}.'
+        self.mode = mode
+
+        self._eps = label_smooth_val
+        if mode == 'classy_vision':
+            self._eps = label_smooth_val / (1 + label_smooth_val)
+        if mode == 'multi_label':
+            self.ce = CrossEntropyLoss(use_sigmoid=True)
+            self.smooth_label = self.multilabel_smooth_label
+        else:
+            self.ce = CrossEntropyLoss(use_soft=True)
+            self.smooth_label = self.original_smooth_label
+
+    def generate_one_hot_like_label(self, label):
+        """This function takes one-hot or index label vectors and computes one-
+        hot like label vectors (float)"""
+        # check if targets are inputted as class integers
+        if label.dim() == 1 or (label.dim() == 2 and label.shape[1] == 1):
+            label = convert_to_one_hot(label.view(-1, 1), self.num_classes)
+        return label.float()
+
+    def original_smooth_label(self, one_hot_like_label):
+        assert self.num_classes > 0
+        smooth_label = one_hot_like_label * (1 - self._eps)
+        smooth_label += self._eps / self.num_classes
+        return smooth_label
+
+    def multilabel_smooth_label(self, one_hot_like_label):
+        assert self.num_classes > 0
+        smooth_label = torch.full_like(one_hot_like_label, self._eps)
+        smooth_label.masked_fill_(one_hot_like_label > 0, 1 - self._eps)
+        return smooth_label
+
+    def forward(self,
+                cls_score,
+                label,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None,
+                **kwargs):
+        if self.num_classes is not None:
+            assert self.num_classes == cls_score.shape[1], \
+                f'num_classes should equal to cls_score.shape[1], ' \
+                f'but got num_classes: {self.num_classes} and ' \
+                f'cls_score.shape[1]: {cls_score.shape[1]}'
+        else:
+            self.num_classes = cls_score.shape[1]
+
+        one_hot_like_label = self.generate_one_hot_like_label(label=label)
+        assert one_hot_like_label.shape == cls_score.shape, \
+            f'LabelSmoothLoss requires output and target ' \
+            f'to be same shape, but got output.shape: {cls_score.shape} ' \
+            f'and target.shape: {one_hot_like_label.shape}'
+
+        smoothed_label = self.smooth_label(one_hot_like_label)
+        return self.ce.forward(
+            cls_score,
+            smoothed_label,
+            weight=weight,
+            avg_factor=avg_factor,
+            reduction_override=reduction_override,
+            **kwargs)
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/utils.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/utils.py
+import functools
+
+import torch
+import torch.nn.functional as F
+
+
+def reduce_loss(loss, reduction):
+    """Reduce loss as specified.
+
+    Args:
+        loss (Tensor): Elementwise loss tensor.
+        reduction (str): Options are "none", "mean" and "sum".
+
+    Return:
+        Tensor: Reduced loss tensor.
+    """
+    reduction_enum = F._Reduction.get_enum(reduction)
+    # none: 0, elementwise_mean:1, sum: 2
+    if reduction_enum == 0:
+        return loss
+    elif reduction_enum == 1:
+        return loss.mean()
+    elif reduction_enum == 2:
+        return loss.sum()
+
+
+def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
+    """Apply element-wise weight and reduce loss.
+
+    Args:
+        loss (Tensor): Element-wise loss.
+        weight (Tensor): Element-wise weights.
+        reduction (str): Same as built-in losses of PyTorch.
+        avg_factor (float): Avarage factor when computing the mean of losses.
+
+    Returns:
+        Tensor: Processed loss values.
+    """
+    # if weight is specified, apply element-wise weight
+    if weight is not None:
+        loss = loss * weight
+
+    # if avg_factor is not specified, just reduce the loss
+    if avg_factor is None:
+        loss = reduce_loss(loss, reduction)
+    else:
+        # if reduction is mean, then average the loss by avg_factor
+        if reduction == 'mean':
+            loss = loss.sum() / avg_factor
+        # if reduction is 'none', then do nothing, otherwise raise an error
+        elif reduction != 'none':
+            raise ValueError('avg_factor can not be used with reduction="sum"')
+    return loss
+
+
+def weighted_loss(loss_func):
+    """Create a weighted version of a given loss function.
+
+    To use this decorator, the loss function must have the signature like
+    `loss_func(pred, target, **kwargs)`. The function only needs to compute
+    element-wise loss without any reduction. This decorator will add weight
+    and reduction arguments to the function. The decorated function will have
+    the signature like `loss_func(pred, target, weight=None, reduction='mean',
+    avg_factor=None, **kwargs)`.
+
+    :Example:
+
+    >>> import torch
+    >>> @weighted_loss
+    >>> def l1_loss(pred, target):
+    >>>     return (pred - target).abs()
+
+    >>> pred = torch.Tensor([0, 2, 3])
+    >>> target = torch.Tensor([1, 1, 1])
+    >>> weight = torch.Tensor([1, 0, 1])
+
+    >>> l1_loss(pred, target)
+    tensor(1.3333)
+    >>> l1_loss(pred, target, weight)
+    tensor(1.)
+    >>> l1_loss(pred, target, reduction='none')
+    tensor([1., 1., 2.])
+    >>> l1_loss(pred, target, weight, avg_factor=2)
+    tensor(1.5000)
+    """
+
+    @functools.wraps(loss_func)
+    def wrapper(pred,
+                target,
+                weight=None,
+                reduction='mean',
+                avg_factor=None,
+                **kwargs):
+        # get element-wise loss
+        loss = loss_func(pred, target, **kwargs)
+        loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+        return loss
+
+    return wrapper
+
+
+def convert_to_one_hot(targets: torch.Tensor, classes) -> torch.Tensor:
+    """This function converts target class indices to one-hot vectors, given
+    the number of classes.
+
+    Args:
+        targets (Tensor): The ground truth label of the prediction
+                with shape (N, 1)
+        classes (int): the number of classes.
+
+    Returns:
+        Tensor: Processed loss values.
+    """
+    assert (torch.max(targets).item() <
+            classes), 'Class Index must be less than number of classes'
+    one_hot_targets = torch.zeros((targets.shape[0], classes),
+                                  dtype=torch.long,
+                                  device=targets.device)
+    one_hot_targets.scatter_(1, targets.long(), 1)
+    return one_hot_targets
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/necks/__init__.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/necks/__init__.py
+from .gap import GlobalAveragePooling
+
+__all__ = ['GlobalAveragePooling']
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/necks/gap.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/necks/gap.py
+import torch
+import torch.nn as nn
+
+from ..builder import NECKS
+
+
+@NECKS.register_module()
+class GlobalAveragePooling(nn.Module):
+    """Global Average Pooling neck.
+
+    Note that we use `view` to remove extra channel after pooling. We do not
+    use `squeeze` as it will also remove the batch dimension when the tensor
+    has a batch dimension of size 1, which can lead to unexpected errors.
+
+    Args:
+        dim (int): Dimensions of each sample channel, can be one of {1, 2, 3}.
+            Default: 2
+    """
+
+    def __init__(self, dim=2):
+        super(GlobalAveragePooling, self).__init__()
+        assert dim in [1, 2, 3], 'GlobalAveragePooling dim only support ' \
+            f'{1, 2, 3}, get {dim} instead.'
+        if dim == 1:
+            self.gap = nn.AdaptiveAvgPool1d(1)
+        elif dim == 2:
+            self.gap = nn.AdaptiveAvgPool2d((1, 1))
+        else:
+            self.gap = nn.AdaptiveAvgPool3d((1, 1, 1))
+
+    def init_weights(self):
+        pass
+
+    def forward(self, inputs):
+        if isinstance(inputs, tuple):
+            outs = tuple([self.gap(x) for x in inputs])
+            outs = tuple(
+                [out.view(x.size(0), -1) for out, x in zip(outs, inputs)])
+        elif isinstance(inputs, torch.Tensor):
+            outs = self.gap(inputs)
+            outs = outs.view(inputs.size(0), -1)
+        else:
+            raise TypeError('neck inputs should be tuple or torch.tensor')
+        return outs
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/__init__.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/__init__.py
+from .augment.augments import Augments
+from .channel_shuffle import channel_shuffle
+from .helpers import to_2tuple, to_3tuple, to_4tuple, to_ntuple
+from .inverted_residual import InvertedResidual
+from .make_divisible import make_divisible
+from .se_layer import SELayer
+
+__all__ = [
+    'channel_shuffle', 'make_divisible', 'InvertedResidual', 'SELayer',
+    'to_ntuple', 'to_2tuple', 'to_3tuple', 'to_4tuple', 'Augments'
+]
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/__init__.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/__init__.py
+from .augments import Augments
+from .cutmix import BatchCutMixLayer
+from .identity import Identity
+from .mixup import BatchMixupLayer
+
+__all__ = ['Augments', 'BatchCutMixLayer', 'Identity', 'BatchMixupLayer']
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/augments.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/augments.py
+import random
+
+import numpy as np
+
+from .builder import build_augment
+
+
+class Augments(object):
+    """Data augments.
+
+    We implement some data augmentation methods, such as mixup, cutmix.
+    Args:
+        augments_cfg (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict`):
+            Config dict of augments
+
+    Example:
+        >>> augments_cfg = [
+                dict(type='BatchCutMix', alpha=1., num_classes=10, prob=0.5),
+                dict(type='BatchMixup', alpha=1., num_classes=10, prob=0.3)
+            ]
+        >>> augments = Augments(augments_cfg)
+        >>> imgs = torch.randn(16, 3, 32, 32)
+        >>> label = torch.randint(0, 10, (16, ))
+        >>> imgs, label = augments(imgs, label)
+
+    To decide which augmentation within Augments block is used
+    the following rule is applied.
+    We pick augmentation based on the probabilities. In the example above,
+    we decide if we should use BatchCutMix with probability 0.5,
+    BatchMixup 0.3. As Identity is not in augments_cfg, we use Identity with
+    probability 1 - 0.5 - 0.3 = 0.2.
+    """
+
+    def __init__(self, augments_cfg):
+        super(Augments, self).__init__()
+
+        if isinstance(augments_cfg, dict):
+            augments_cfg = [augments_cfg]
+
+        assert len(augments_cfg) > 0, \
+            'The length of augments_cfg should be positive.'
+        self.augments = [build_augment(cfg) for cfg in augments_cfg]
+        self.augment_probs = [aug.prob for aug in self.augments]
+
+        has_identity = any([cfg['type'] == 'Identity' for cfg in augments_cfg])
+        if has_identity:
+            assert sum(self.augment_probs) == 1.0,\
+                'The sum of augmentation probabilities should equal to 1,' \
+                ' but got {:.2f}'.format(sum(self.augment_probs))
+        else:
+            assert sum(self.augment_probs) <= 1.0,\
+                'The sum of augmentation probabilities should less than or ' \
+                'equal to 1, but got {:.2f}'.format(sum(self.augment_probs))
+            identity_prob = 1 - sum(self.augment_probs)
+            if identity_prob > 0:
+                num_classes = self.augments[0].num_classes
+                self.augments += [
+                    build_augment(
+                        dict(
+                            type='Identity',
+                            num_classes=num_classes,
+                            prob=identity_prob))
+                ]
+                self.augment_probs += [identity_prob]
+
+    def __call__(self, img, gt_label):
+        if self.augments:
+            random_state = np.random.RandomState(random.randint(0, 2**32 - 1))
+            aug = random_state.choice(self.augments, p=self.augment_probs)
+            return aug(img, gt_label)
+        return img, gt_label
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/builder.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/builder.py
+from mmcv.utils import Registry, build_from_cfg
+
+AUGMENT = Registry('augment')
+
+
+def build_augment(cfg, default_args=None):
+    return build_from_cfg(cfg, AUGMENT, default_args)
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/cutmix.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/cutmix.py
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from .builder import AUGMENT
+
+
+class BaseCutMixLayer(object, metaclass=ABCMeta):
+    """Base class for CutMixLayer.
+
+    Args:
+        alpha (float): Parameters for Beta distribution. Positive(>0)
+        num_classes (int): The number of classes
+        prob (float): MixUp probability. It should be in range [0, 1].
+            Default to 1.0
+        cutmix_minmax (List[float], optional): cutmix min/max image ratio.
+            (as percent of image size). When cutmix_minmax is not None, we
+            generate cutmix bounding-box using cutmix_minmax instead of alpha
+        correct_lam (bool): Whether to apply lambda correction when cutmix bbox
+            clipped by image borders. Default to True
+    """
+
+    def __init__(self,
+                 alpha,
+                 num_classes,
+                 prob=1.0,
+                 cutmix_minmax=None,
+                 correct_lam=True):
+        super(BaseCutMixLayer, self).__init__()
+
+        assert isinstance(alpha, float) and alpha > 0
+        assert isinstance(num_classes, int)
+        assert isinstance(prob, float) and 0.0 <= prob <= 1.0
+
+        self.alpha = alpha
+        self.num_classes = num_classes
+        self.prob = prob
+        self.cutmix_minmax = cutmix_minmax
+        self.correct_lam = correct_lam
+
+    def rand_bbox_minmax(self, img_shape, count=None):
+        """Min-Max CutMix bounding-box Inspired by Darknet cutmix
+        implementation. It generates a random rectangular bbox based on min/max
+        percent values applied to each dimension of the input image.
+
+        Typical defaults for minmax are usually in the  .2-.3 for min and
+        .8-.9 range for max.
+
+        Args:
+            img_shape (tuple): Image shape as tuple
+            count (int, optional): Number of bbox to generate. Default to None
+        """
+        assert len(self.cutmix_minmax) == 2
+        img_h, img_w = img_shape[-2:]
+        cut_h = np.random.randint(
+            int(img_h * self.cutmix_minmax[0]),
+            int(img_h * self.cutmix_minmax[1]),
+            size=count)
+        cut_w = np.random.randint(
+            int(img_w * self.cutmix_minmax[0]),
+            int(img_w * self.cutmix_minmax[1]),
+            size=count)
+        yl = np.random.randint(0, img_h - cut_h, size=count)
+        xl = np.random.randint(0, img_w - cut_w, size=count)
+        yu = yl + cut_h
+        xu = xl + cut_w
+        return yl, yu, xl, xu
+
+    def rand_bbox(self, img_shape, lam, margin=0., count=None):
+        """Standard CutMix bounding-box that generates a random square bbox
+        based on lambda value. This implementation includes support for
+        enforcing a border margin as percent of bbox dimensions.
+
+        Args:
+            img_shape (tuple): Image shape as tuple
+            lam (float): Cutmix lambda value
+            margin (float): Percentage of bbox dimension to enforce as margin
+                (reduce amount of box outside image). Default to 0.
+            count (int, optional): Number of bbox to generate. Default to None
+        """
+        ratio = np.sqrt(1 - lam)
+        img_h, img_w = img_shape[-2:]
+        cut_h, cut_w = int(img_h * ratio), int(img_w * ratio)
+        margin_y, margin_x = int(margin * cut_h), int(margin * cut_w)
+        cy = np.random.randint(0 + margin_y, img_h - margin_y, size=count)
+        cx = np.random.randint(0 + margin_x, img_w - margin_x, size=count)
+        yl = np.clip(cy - cut_h // 2, 0, img_h)
+        yh = np.clip(cy + cut_h // 2, 0, img_h)
+        xl = np.clip(cx - cut_w // 2, 0, img_w)
+        xh = np.clip(cx + cut_w // 2, 0, img_w)
+        return yl, yh, xl, xh
+
+    def cutmix_bbox_and_lam(self, img_shape, lam, count=None):
+        """Generate bbox and apply lambda correction.
+
+        Args:
+            img_shape (tuple): Image shape as tuple
+            lam (float): Cutmix lambda value
+            count (int, optional): Number of bbox to generate. Default to None
+        """
+        if self.cutmix_minmax is not None:
+            yl, yu, xl, xu = self.rand_bbox_minmax(img_shape, count=count)
+        else:
+            yl, yu, xl, xu = self.rand_bbox(img_shape, lam, count=count)
+        if self.correct_lam or self.cutmix_minmax is not None:
+            bbox_area = (yu - yl) * (xu - xl)
+            lam = 1. - bbox_area / float(img_shape[-2] * img_shape[-1])
+        return (yl, yu, xl, xu), lam
+
+    @abstractmethod
+    def cutmix(self, imgs, gt_label):
+        pass
+
+
+@AUGMENT.register_module(name='BatchCutMix')
+class BatchCutMixLayer(BaseCutMixLayer):
+    """CutMix layer for batch CutMix."""
+
+    def __init__(self, *args, **kwargs):
+        super(BatchCutMixLayer, self).__init__(*args, **kwargs)
+
+    def cutmix(self, img, gt_label):
+        one_hot_gt_label = F.one_hot(gt_label, num_classes=self.num_classes)
+        lam = np.random.beta(self.alpha, self.alpha)
+        batch_size = img.size(0)
+        index = torch.randperm(batch_size)
+
+        (bby1, bby2, bbx1,
+         bbx2), lam = self.cutmix_bbox_and_lam(img.shape, lam)
+        img[:, :, bby1:bby2, bbx1:bbx2] = \
+            img[index, :, bby1:bby2, bbx1:bbx2]
+        mixed_gt_label = lam * one_hot_gt_label + (
+            1 - lam) * one_hot_gt_label[index, :]
+        return img, mixed_gt_label
+
+    def __call__(self, img, gt_label):
+        return self.cutmix(img, gt_label)
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/identity.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/identity.py
+import torch.nn.functional as F
+
+from .builder import AUGMENT
+
+
+@AUGMENT.register_module(name='Identity')
+class Identity(object):
+    """Change gt_label to one_hot encoding and keep img as the same.
+
+    Args:
+        num_classes (int): The number of classes.
+        prob (float): MixUp probability. It should be in range [0, 1].
+            Default to 1.0
+    """
+
+    def __init__(self, num_classes, prob=1.0):
+        super(Identity, self).__init__()
+
+        assert isinstance(num_classes, int)
+        assert isinstance(prob, float) and 0.0 <= prob <= 1.0
+
+        self.num_classes = num_classes
+        self.prob = prob
+
+    def one_hot(self, gt_label):
+        return F.one_hot(gt_label, num_classes=self.num_classes)
+
+    def __call__(self, img, gt_label):
+        return img, self.one_hot(gt_label)
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/mixup.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/augment/mixup.py
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from .builder import AUGMENT
+
+
+class BaseMixupLayer(object, metaclass=ABCMeta):
+    """Base class for MixupLayer.
+
+    Args:
+        alpha (float): Parameters for Beta distribution.
+        num_classes (int): The number of classes.
+        prob (float): MixUp probability. It should be in range [0, 1].
+            Default to 1.0
+    """
+
+    def __init__(self, alpha, num_classes, prob=1.0):
+        super(BaseMixupLayer, self).__init__()
+
+        assert isinstance(alpha, float) and alpha > 0
+        assert isinstance(num_classes, int)
+        assert isinstance(prob, float) and 0.0 <= prob <= 1.0
+
+        self.alpha = alpha
+        self.num_classes = num_classes
+        self.prob = prob
+
+    @abstractmethod
+    def mixup(self, imgs, gt_label):
+        pass
+
+
+@AUGMENT.register_module(name='BatchMixup')
+class BatchMixupLayer(BaseMixupLayer):
+    """Mixup layer for batch mixup."""
+
+    def __init__(self, *args, **kwargs):
+        super(BatchMixupLayer, self).__init__(*args, **kwargs)
+
+    def mixup(self, img, gt_label):
+        one_hot_gt_label = F.one_hot(gt_label, num_classes=self.num_classes)
+        lam = np.random.beta(self.alpha, self.alpha)
+        batch_size = img.size(0)
+        index = torch.randperm(batch_size)
+
+        mixed_img = lam * img + (1 - lam) * img[index, :]
+        mixed_gt_label = lam * one_hot_gt_label + (
+            1 - lam) * one_hot_gt_label[index, :]
+
+        return mixed_img, mixed_gt_label
+
+    def __call__(self, img, gt_label):
+        return self.mixup(img, gt_label)
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/channel_shuffle.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/channel_shuffle.py
+import torch
+
+
+def channel_shuffle(x, groups):
+    """Channel Shuffle operation.
+
+    This function enables cross-group information flow for multiple groups
+    convolution layers.
+
+    Args:
+        x (Tensor): The input tensor.
+        groups (int): The number of groups to divide the input tensor
+            in the channel dimension.
+
+    Returns:
+        Tensor: The output tensor after channel shuffle operation.
+    """
+
+    batch_size, num_channels, height, width = x.size()
+    assert (num_channels % groups == 0), ('num_channels should be '
+                                          'divisible by groups')
+    channels_per_group = num_channels // groups
+
+    x = x.view(batch_size, groups, channels_per_group, height, width)
+    x = torch.transpose(x, 1, 2).contiguous()
+    x = x.view(batch_size, -1, height, width)
+
+    return x
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/helpers.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/helpers.py
+import collections.abc
+from itertools import repeat
+
+
+# From PyTorch internals
+def _ntuple(n):
+
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable):
+            return x
+        return tuple(repeat(x, n))
+
+    return parse
+
+
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+to_ntuple = _ntuple
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/inverted_residual.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/inverted_residual.py
+import torch.utils.checkpoint as cp
+from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
+
+from .se_layer import SELayer
+
+
+# class InvertedResidual(nn.Module):
+class InvertedResidual(BaseModule):
+    """Inverted Residual Block.
+
+    Args:
+        in_channels (int): The input channels of this Module.
+        out_channels (int): The output channels of this Module.
+        mid_channels (int): The input channels of the depthwise convolution.
+        kernel_size (int): The kernal size of the depthwise convolution.
+            Default: 3.
+        stride (int): The stride of the depthwise convolution. Default: 1.
+        se_cfg (dict): Config dict for se layer. Defaul: None, which means no
+            se layer.
+        with_expand_conv (bool): Use expand conv or not. If set False,
+            mid_channels must be the same with in_channels.
+            Default: True.
+        conv_cfg (dict): Config dict for convolution layer. Default: None,
+            which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU').
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+
+    Returns:
+        Tensor: The output tensor.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 mid_channels,
+                 kernel_size=3,
+                 stride=1,
+                 se_cfg=None,
+                 with_expand_conv=True,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 with_cp=False,
+                 init_cfg=None):
+        super(InvertedResidual, self).__init__(init_cfg)
+        self.with_res_shortcut = (stride == 1 and in_channels == out_channels)
+        assert stride in [1, 2]
+        self.with_cp = with_cp
+        self.with_se = se_cfg is not None
+        self.with_expand_conv = with_expand_conv
+
+        if self.with_se:
+            assert isinstance(se_cfg, dict)
+        if not self.with_expand_conv:
+            assert mid_channels == in_channels
+
+        if self.with_expand_conv:
+            self.expand_conv = ConvModule(
+                in_channels=in_channels,
+                out_channels=mid_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg)
+        self.depthwise_conv = ConvModule(
+            in_channels=mid_channels,
+            out_channels=mid_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=kernel_size // 2,
+            groups=mid_channels,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        if self.with_se:
+            self.se = SELayer(**se_cfg)
+        self.linear_conv = ConvModule(
+            in_channels=mid_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            out = x
+
+            if self.with_expand_conv:
+                out = self.expand_conv(out)
+
+            out = self.depthwise_conv(out)
+
+            if self.with_se:
+                out = self.se(out)
+
+            out = self.linear_conv(out)
+
+            if self.with_res_shortcut:
+                return x + out
+            else:
+                return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        return out
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/make_divisible.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/utils/make_divisible.py
+def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
+    """Make divisible function.
+
+    This function rounds the channel number down to the nearest value that can
+    be divisible by the divisor.
+
+    Args:
+        value (int): The original channel number.
+        divisor (int): The divisor to fully divide the channel number.
+        min_value (int, optional): The minimum value of the output channel.
+            Default: None, means that the minimum value equal to the divisor.
+        min_ratio (float): The minimum ratio of the rounded channel
+            number to the original channel number. Default: 0.9.
+    Returns:
+        int: The modified output channel number
+    """
+
+    if min_value is None:
+        min_value = divisor
+    new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than (1-min_ratio).
+    if new_value < min_ratio * value:
+        new_value += divisor
+    return new_value