添加mmclassification-0.24.1代码，删除mmclassification-speed-benchmark

0fd8347d · unknown · cc567e9e · 0fd8347d · 0fd8347d · 0fd8347d
Commit 0fd8347d authored Jan 08, 2023 by unknown
20 changed files
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/classifiers/image.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/classifiers/image.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from ..builder import CLASSIFIERS, build_backbone, build_head, build_neck
+from ..heads import MultiLabelClsHead
+from ..utils.augment import Augments
+from .base import BaseClassifier
+
+
+@CLASSIFIERS.register_module()
+class ImageClassifier(BaseClassifier):
+
+    def __init__(self,
+                 backbone,
+                 neck=None,
+                 head=None,
+                 pretrained=None,
+                 train_cfg=None,
+                 init_cfg=None):
+        super(ImageClassifier, self).__init__(init_cfg)
+
+        if pretrained is not None:
+            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
+        self.backbone = build_backbone(backbone)
+
+        if neck is not None:
+            self.neck = build_neck(neck)
+
+        if head is not None:
+            self.head = build_head(head)
+
+        self.augments = None
+        if train_cfg is not None:
+            augments_cfg = train_cfg.get('augments', None)
+            if augments_cfg is not None:
+                self.augments = Augments(augments_cfg)
+
+    def forward_dummy(self, img):
+        """Used for computing network flops.
+
+        See `mmclassificaiton/tools/analysis_tools/get_flops.py`
+        """
+        return self.extract_feat(img, stage='pre_logits')
+
+    def extract_feat(self, img, stage='neck'):
+        """Directly extract features from the specified stage.
+
+        Args:
+            img (Tensor): The input images. The shape of it should be
+                ``(num_samples, num_channels, *img_shape)``.
+            stage (str): Which stage to output the feature. Choose from
+                "backbone", "neck" and "pre_logits". Defaults to "neck".
+
+        Returns:
+            tuple | Tensor: The output of specified stage.
+                The output depends on detailed implementation. In general, the
+                output of backbone and neck is a tuple and the output of
+                pre_logits is a tensor.
+
+        Examples:
+            1. Backbone output
+
+            >>> import torch
+            >>> from mmcv import Config
+            >>> from mmcls.models import build_classifier
+            >>>
+            >>> cfg = Config.fromfile('configs/resnet/resnet18_8xb32_in1k.py').model
+            >>> cfg.backbone.out_indices = (0, 1, 2, 3)  # Output multi-scale feature maps
+            >>> model = build_classifier(cfg)
+            >>> outs = model.extract_feat(torch.rand(1, 3, 224, 224), stage='backbone')
+            >>> for out in outs:
+            ...     print(out.shape)
+            torch.Size([1, 64, 56, 56])
+            torch.Size([1, 128, 28, 28])
+            torch.Size([1, 256, 14, 14])
+            torch.Size([1, 512, 7, 7])
+
+            2. Neck output
+
+            >>> import torch
+            >>> from mmcv import Config
+            >>> from mmcls.models import build_classifier
+            >>>
+            >>> cfg = Config.fromfile('configs/resnet/resnet18_8xb32_in1k.py').model
+            >>> cfg.backbone.out_indices = (0, 1, 2, 3)  # Output multi-scale feature maps
+            >>> model = build_classifier(cfg)
+            >>>
+            >>> outs = model.extract_feat(torch.rand(1, 3, 224, 224), stage='neck')
+            >>> for out in outs:
+            ...     print(out.shape)
+            torch.Size([1, 64])
+            torch.Size([1, 128])
+            torch.Size([1, 256])
+            torch.Size([1, 512])
+
+            3. Pre-logits output (without the final linear classifier head)
+
+            >>> import torch
+            >>> from mmcv import Config
+            >>> from mmcls.models import build_classifier
+            >>>
+            >>> cfg = Config.fromfile('configs/vision_transformer/vit-base-p16_pt-64xb64_in1k-224.py').model
+            >>> model = build_classifier(cfg)
+            >>>
+            >>> out = model.extract_feat(torch.rand(1, 3, 224, 224), stage='pre_logits')
+            >>> print(out.shape)  # The hidden dims in head is 3072
+            torch.Size([1, 3072])
+        """  # noqa: E501
+        assert stage in ['backbone', 'neck', 'pre_logits'], \
+            (f'Invalid output stage "{stage}", please choose from "backbone", '
+             '"neck" and "pre_logits"')
+
+        x = self.backbone(img)
+
+        if stage == 'backbone':
+            return x
+
+        if self.with_neck:
+            x = self.neck(x)
+        if stage == 'neck':
+            return x
+
+        if self.with_head and hasattr(self.head, 'pre_logits'):
+            x = self.head.pre_logits(x)
+        return x
+
+    def forward_train(self, img, gt_label, **kwargs):
+        """Forward computation during training.
+
+        Args:
+            img (Tensor): of shape (N, C, H, W) encoding input images.
+                Typically these should be mean centered and std scaled.
+            gt_label (Tensor): It should be of shape (N, 1) encoding the
+                ground-truth label of input images for single label task. It
+                should be of shape (N, C) encoding the ground-truth label
+                of input images for multi-labels task.
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
+        if self.augments is not None:
+            img, gt_label = self.augments(img, gt_label)
+
+        x = self.extract_feat(img)
+
+        losses = dict()
+        loss = self.head.forward_train(x, gt_label)
+
+        losses.update(loss)
+
+        return losses
+
+    def simple_test(self, img, img_metas=None, **kwargs):
+        """Test without augmentation."""
+        x = self.extract_feat(img)
+
+        if isinstance(self.head, MultiLabelClsHead):
+            assert 'softmax' not in kwargs, (
+                'Please use `sigmoid` instead of `softmax` '
+                'in multi-label tasks.')
+        res = self.head.simple_test(x, **kwargs)
+
+        return res
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/__init__.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .cls_head import ClsHead
+from .conformer_head import ConformerHead
+from .deit_head import DeiTClsHead
+from .efficientformer_head import EfficientFormerClsHead
+from .linear_head import LinearClsHead
+from .multi_label_csra_head import CSRAClsHead
+from .multi_label_head import MultiLabelClsHead
+from .multi_label_linear_head import MultiLabelLinearClsHead
+from .stacked_head import StackedLinearClsHead
+from .vision_transformer_head import VisionTransformerClsHead
+
+__all__ = [
+    'ClsHead', 'LinearClsHead', 'StackedLinearClsHead', 'MultiLabelClsHead',
+    'MultiLabelLinearClsHead', 'VisionTransformerClsHead', 'DeiTClsHead',
+    'ConformerHead', 'EfficientFormerClsHead', 'CSRAClsHead'
+]
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/base_head.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/base_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
 from abc import ABCMeta, abstractmethod

 from mmcv.runner import BaseModule

--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/cls_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/cls_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+
+import torch
+import torch.nn.functional as F
+
+from mmcls.models.losses import Accuracy
+from ..builder import HEADS, build_loss
+from ..utils import is_tracing
+from .base_head import BaseHead
+
+
+@HEADS.register_module()
+class ClsHead(BaseHead):
+    """classification head.
+
+    Args:
+        loss (dict): Config of classification loss.
+        topk (int | tuple): Top-k accuracy.
+        cal_acc (bool): Whether to calculate accuracy during training.
+            If you use Mixup/CutMix or something like that during training,
+            it is not reasonable to calculate accuracy. Defaults to False.
+    """
+
+    def __init__(self,
+                 loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+                 topk=(1, ),
+                 cal_acc=False,
+                 init_cfg=None):
+        super(ClsHead, self).__init__(init_cfg=init_cfg)
+
+        assert isinstance(loss, dict)
+        assert isinstance(topk, (int, tuple))
+        if isinstance(topk, int):
+            topk = (topk, )
+        for _topk in topk:
+            assert _topk > 0, 'Top-k should be larger than 0'
+        self.topk = topk
+
+        self.compute_loss = build_loss(loss)
+        self.compute_accuracy = Accuracy(topk=self.topk)
+        self.cal_acc = cal_acc
+
+    def loss(self, cls_score, gt_label, **kwargs):
+        num_samples = len(cls_score)
+        losses = dict()
+        # compute loss
+        loss = self.compute_loss(
+            cls_score, gt_label, avg_factor=num_samples, **kwargs)
+        if self.cal_acc:
+            # compute accuracy
+            acc = self.compute_accuracy(cls_score, gt_label)
+            assert len(acc) == len(self.topk)
+            losses['accuracy'] = {
+                f'top-{k}': a
+                for k, a in zip(self.topk, acc)
+            }
+        losses['loss'] = loss
+        return losses
+
+    def forward_train(self, cls_score, gt_label, **kwargs):
+        if isinstance(cls_score, tuple):
+            cls_score = cls_score[-1]
+        losses = self.loss(cls_score, gt_label, **kwargs)
+        return losses
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+
+        warnings.warn(
+            'The input of ClsHead should be already logits. '
+            'Please modify the backbone if you want to get pre-logits feature.'
+        )
+        return x
+
+    def simple_test(self, cls_score, softmax=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            cls_score (tuple[Tensor]): The input classification score logits.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. The shape of every item should be
+                ``(num_samples, num_classes)``.
+            softmax (bool): Whether to softmax the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        if isinstance(cls_score, tuple):
+            cls_score = cls_score[-1]
+
+        if softmax:
+            pred = (
+                F.softmax(cls_score, dim=1) if cls_score is not None else None)
+        else:
+            pred = cls_score
+
+        if post_process:
+            return self.post_process(pred)
+        else:
+            return pred
+
+    def post_process(self, pred):
+        on_trace = is_tracing()
+        if torch.onnx.is_in_onnx_export() or on_trace:
+            return pred
+        pred = list(pred.detach().cpu().numpy())
+        return pred
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/conformer_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/conformer_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn.utils.weight_init import trunc_normal_
+
+from ..builder import HEADS
+from .cls_head import ClsHead
+
+
+@HEADS.register_module()
+class ConformerHead(ClsHead):
+    """Linear classifier head.
+
+    Args:
+        num_classes (int): Number of categories excluding the background
+            category.
+        in_channels (int): Number of channels in the input feature map.
+        init_cfg (dict | optional): The extra init config of layers.
+            Defaults to use ``dict(type='Normal', layer='Linear', std=0.01)``.
+    """
+
+    def __init__(
+            self,
+            num_classes,
+            in_channels,  # [conv_dim, trans_dim]
+            init_cfg=dict(type='Normal', layer='Linear', std=0.01),
+            *args,
+            **kwargs):
+        super(ConformerHead, self).__init__(init_cfg=None, *args, **kwargs)
+
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+        self.init_cfg = init_cfg
+
+        if self.num_classes <= 0:
+            raise ValueError(
+                f'num_classes={num_classes} must be a positive integer')
+
+        self.conv_cls_head = nn.Linear(self.in_channels[0], num_classes)
+        self.trans_cls_head = nn.Linear(self.in_channels[1], num_classes)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+    def init_weights(self):
+        super(ConformerHead, self).init_weights()
+
+        if (isinstance(self.init_cfg, dict)
+                and self.init_cfg['type'] == 'Pretrained'):
+            # Suppress default init if use pretrained model.
+            return
+        else:
+            self.apply(self._init_weights)
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+        return x
+
+    def simple_test(self, x, softmax=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            x (tuple[tuple[tensor, tensor]]): The input features.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. Every item should be a tuple which
+                includes convluation features and transformer features. The
+                shape of them should be ``(num_samples, in_channels[0])`` and
+                ``(num_samples, in_channels[1])``.
+            softmax (bool): Whether to softmax the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        x = self.pre_logits(x)
+        # There are two outputs in the Conformer model
+        assert len(x) == 2
+
+        conv_cls_score = self.conv_cls_head(x[0])
+        tran_cls_score = self.trans_cls_head(x[1])
+
+        if softmax:
+            cls_score = conv_cls_score + tran_cls_score
+            pred = (
+                F.softmax(cls_score, dim=1) if cls_score is not None else None)
+            if post_process:
+                pred = self.post_process(pred)
+        else:
+            pred = [conv_cls_score, tran_cls_score]
+            if post_process:
+                pred = list(map(self.post_process, pred))
+        return pred
+
+    def forward_train(self, x, gt_label):
+        x = self.pre_logits(x)
+        assert isinstance(x, list) and len(x) == 2, \
+            'There should be two outputs in the Conformer model'
+
+        conv_cls_score = self.conv_cls_head(x[0])
+        tran_cls_score = self.trans_cls_head(x[1])
+
+        losses = self.loss([conv_cls_score, tran_cls_score], gt_label)
+        return losses
+
+    def loss(self, cls_score, gt_label):
+        num_samples = len(cls_score[0])
+        losses = dict()
+        # compute loss
+        loss = sum([
+            self.compute_loss(score, gt_label, avg_factor=num_samples) /
+            len(cls_score) for score in cls_score
+        ])
+        if self.cal_acc:
+            # compute accuracy
+            acc = self.compute_accuracy(cls_score[0] + cls_score[1], gt_label)
+            assert len(acc) == len(self.topk)
+            losses['accuracy'] = {
+                f'top-{k}': a
+                for k, a in zip(self.topk, acc)
+            }
+        losses['loss'] = loss
+        return losses
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/deit_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/deit_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmcls.utils import get_root_logger
+from ..builder import HEADS
+from .vision_transformer_head import VisionTransformerClsHead
+
+
+@HEADS.register_module()
+class DeiTClsHead(VisionTransformerClsHead):
+    """Distilled Vision Transformer classifier head.
+
+    Comparing with the :class:`VisionTransformerClsHead`, this head adds an
+    extra linear layer to handle the dist token. The final classification score
+    is the average of both linear transformation results of ``cls_token`` and
+    ``dist_token``.
+
+    Args:
+        num_classes (int): Number of categories excluding the background
+            category.
+        in_channels (int): Number of channels in the input feature map.
+        hidden_dim (int): Number of the dimensions for hidden layer.
+            Defaults to None, which means no extra hidden layer.
+        act_cfg (dict): The activation config. Only available during
+            pre-training. Defaults to ``dict(type='Tanh')``.
+        init_cfg (dict): The extra initialization configs. Defaults to
+            ``dict(type='Constant', layer='Linear', val=0)``.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(DeiTClsHead, self).__init__(*args, **kwargs)
+        if self.hidden_dim is None:
+            head_dist = nn.Linear(self.in_channels, self.num_classes)
+        else:
+            head_dist = nn.Linear(self.hidden_dim, self.num_classes)
+        self.layers.add_module('head_dist', head_dist)
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+        _, cls_token, dist_token = x
+
+        if self.hidden_dim is None:
+            return cls_token, dist_token
+        else:
+            cls_token = self.layers.act(self.layers.pre_logits(cls_token))
+            dist_token = self.layers.act(self.layers.pre_logits(dist_token))
+            return cls_token, dist_token
+
+    def simple_test(self, x, softmax=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            x (tuple[tuple[tensor, tensor, tensor]]): The input features.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. Every item should be a tuple which
+                includes patch token, cls token and dist token. The cls token
+                and dist token will be used to classify and the shape of them
+                should be ``(num_samples, in_channels)``.
+            softmax (bool): Whether to softmax the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        cls_token, dist_token = self.pre_logits(x)
+        cls_score = (self.layers.head(cls_token) +
+                     self.layers.head_dist(dist_token)) / 2
+
+        if softmax:
+            pred = F.softmax(
+                cls_score, dim=1) if cls_score is not None else None
+        else:
+            pred = cls_score
+
+        if post_process:
+            return self.post_process(pred)
+        else:
+            return pred
+
+    def forward_train(self, x, gt_label):
+        logger = get_root_logger()
+        logger.warning("MMClassification doesn't support to train the "
+                       'distilled version DeiT.')
+        cls_token, dist_token = self.pre_logits(x)
+        cls_score = (self.layers.head(cls_token) +
+                     self.layers.head_dist(dist_token)) / 2
+        losses = self.loss(cls_score, gt_label)
+        return losses
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/efficientformer_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/efficientformer_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import HEADS
+from .cls_head import ClsHead
+
+
+@HEADS.register_module()
+class EfficientFormerClsHead(ClsHead):
+    """EfficientFormer classifier head.
+
+    Args:
+        num_classes (int): Number of categories excluding the background
+            category.
+        in_channels (int): Number of channels in the input feature map.
+        distillation (bool): Whether use a additional distilled head.
+            Defaults to True.
+        init_cfg (dict): The extra initialization configs. Defaults to
+            ``dict(type='Normal', layer='Linear', std=0.01)``.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 distillation=True,
+                 init_cfg=dict(type='Normal', layer='Linear', std=0.01),
+                 *args,
+                 **kwargs):
+        super(EfficientFormerClsHead, self).__init__(
+            init_cfg=init_cfg, *args, **kwargs)
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+        self.dist = distillation
+
+        if self.num_classes <= 0:
+            raise ValueError(
+                f'num_classes={num_classes} must be a positive integer')
+
+        self.head = nn.Linear(self.in_channels, self.num_classes)
+        if self.dist:
+            self.dist_head = nn.Linear(self.in_channels, self.num_classes)
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+        return x
+
+    def simple_test(self, x, softmax=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            x (tuple[tuple[tensor, tensor]]): The input features.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. Every item should be a tuple which
+                includes patch token and cls token. The cls token will be used
+                to classify and the shape of it should be
+                ``(num_samples, in_channels)``.
+            softmax (bool): Whether to softmax the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        x = self.pre_logits(x)
+        cls_score = self.head(x)
+        if self.dist:
+            cls_score = (cls_score + self.dist_head(x)) / 2
+
+        if softmax:
+            pred = (
+                F.softmax(cls_score, dim=1) if cls_score is not None else None)
+        else:
+            pred = cls_score
+
+        if post_process:
+            return self.post_process(pred)
+        else:
+            return pred
+
+    def forward_train(self, x, gt_label, **kwargs):
+        if self.dist:
+            raise NotImplementedError(
+                "MMClassification doesn't support to train"
+                ' the distilled version EfficientFormer.')
+        else:
+            x = self.pre_logits(x)
+            cls_score = self.head(x)
+            losses = self.loss(cls_score, gt_label, **kwargs)
+            return losses
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/linear_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/linear_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import HEADS
+from .cls_head import ClsHead
+
+
+@HEADS.register_module()
+class LinearClsHead(ClsHead):
+    """Linear classifier head.
+
+    Args:
+        num_classes (int): Number of categories excluding the background
+            category.
+        in_channels (int): Number of channels in the input feature map.
+        init_cfg (dict | optional): The extra init config of layers.
+            Defaults to use dict(type='Normal', layer='Linear', std=0.01).
+    """
+
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 init_cfg=dict(type='Normal', layer='Linear', std=0.01),
+                 *args,
+                 **kwargs):
+        super(LinearClsHead, self).__init__(init_cfg=init_cfg, *args, **kwargs)
+
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+
+        if self.num_classes <= 0:
+            raise ValueError(
+                f'num_classes={num_classes} must be a positive integer')
+
+        self.fc = nn.Linear(self.in_channels, self.num_classes)
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+        return x
+
+    def simple_test(self, x, softmax=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            x (tuple[Tensor]): The input features.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. The shape of every item should be
+                ``(num_samples, in_channels)``.
+            softmax (bool): Whether to softmax the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        x = self.pre_logits(x)
+        cls_score = self.fc(x)
+
+        if softmax:
+            pred = (
+                F.softmax(cls_score, dim=1) if cls_score is not None else None)
+        else:
+            pred = cls_score
+
+        if post_process:
+            return self.post_process(pred)
+        else:
+            return pred
+
+    def forward_train(self, x, gt_label, **kwargs):
+        x = self.pre_logits(x)
+        cls_score = self.fc(x)
+        losses = self.loss(cls_score, gt_label, **kwargs)
+        return losses
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/multi_label_csra_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/multi_label_csra_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+# Modified from https://github.com/Kevinz-code/CSRA
+import torch
+import torch.nn as nn
+from mmcv.runner import BaseModule, ModuleList
+
+from ..builder import HEADS
+from .multi_label_head import MultiLabelClsHead
+
+
+@HEADS.register_module()
+class CSRAClsHead(MultiLabelClsHead):
+    """Class-specific residual attention classifier head.
+
+    Residual Attention: A Simple but Effective Method for Multi-Label
+                        Recognition (ICCV 2021)
+    Please refer to the `paper <https://arxiv.org/abs/2108.02456>`__ for
+    details.
+
+    Args:
+        num_classes (int): Number of categories.
+        in_channels (int): Number of channels in the input feature map.
+        num_heads (int): Number of residual at tensor heads.
+        loss (dict): Config of classification loss.
+        lam (float): Lambda that combines global average and max pooling
+            scores.
+        init_cfg (dict | optional): The extra init config of layers.
+            Defaults to use dict(type='Normal', layer='Linear', std=0.01).
+    """
+    temperature_settings = {  # softmax temperature settings
+        1: [1],
+        2: [1, 99],
+        4: [1, 2, 4, 99],
+        6: [1, 2, 3, 4, 5, 99],
+        8: [1, 2, 3, 4, 5, 6, 7, 99]
+    }
+
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 num_heads,
+                 lam,
+                 loss=dict(
+                     type='CrossEntropyLoss',
+                     use_sigmoid=True,
+                     reduction='mean',
+                     loss_weight=1.0),
+                 init_cfg=dict(type='Normal', layer='Linear', std=0.01),
+                 *args,
+                 **kwargs):
+        assert num_heads in self.temperature_settings.keys(
+        ), 'The num of heads is not in temperature setting.'
+        assert lam > 0, 'Lambda should be between 0 and 1.'
+        super(CSRAClsHead, self).__init__(
+            init_cfg=init_cfg, loss=loss, *args, **kwargs)
+        self.temp_list = self.temperature_settings[num_heads]
+        self.csra_heads = ModuleList([
+            CSRAModule(num_classes, in_channels, self.temp_list[i], lam)
+            for i in range(num_heads)
+        ])
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+        return x
+
+    def simple_test(self, x, post_process=True, **kwargs):
+        logit = 0.
+        x = self.pre_logits(x)
+        for head in self.csra_heads:
+            logit += head(x)
+        if post_process:
+            return self.post_process(logit)
+        else:
+            return logit
+
+    def forward_train(self, x, gt_label, **kwargs):
+        logit = 0.
+        x = self.pre_logits(x)
+        for head in self.csra_heads:
+            logit += head(x)
+        gt_label = gt_label.type_as(logit)
+        _gt_label = torch.abs(gt_label)
+        losses = self.loss(logit, _gt_label, **kwargs)
+        return losses
+
+
+class CSRAModule(BaseModule):
+    """Basic module of CSRA with different temperature.
+
+    Args:
+        num_classes (int): Number of categories.
+        in_channels (int): Number of channels in the input feature map.
+        T (int): Temperature setting.
+        lam (float): Lambda that combines global average and max pooling
+            scores.
+        init_cfg (dict | optional): The extra init config of layers.
+            Defaults to use dict(type='Normal', layer='Linear', std=0.01).
+    """
+
+    def __init__(self, num_classes, in_channels, T, lam, init_cfg=None):
+
+        super(CSRAModule, self).__init__(init_cfg=init_cfg)
+        self.T = T  # temperature
+        self.lam = lam  # Lambda
+        self.head = nn.Conv2d(in_channels, num_classes, 1, bias=False)
+        self.softmax = nn.Softmax(dim=2)
+
+    def forward(self, x):
+        score = self.head(x) / torch.norm(
+            self.head.weight, dim=1, keepdim=True).transpose(0, 1)
+        score = score.flatten(2)
+        base_logit = torch.mean(score, dim=2)
+
+        if self.T == 99:  # max-pooling
+            att_logit = torch.max(score, dim=2)[0]
+        else:
+            score_soft = self.softmax(score * self.T)
+            att_logit = torch.sum(score * score_soft, dim=2)
+
+        return base_logit + self.lam * att_logit
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/multi_label_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/multi_label_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from ..builder import HEADS, build_loss
+from ..utils import is_tracing
+from .base_head import BaseHead
+
+
+@HEADS.register_module()
+class MultiLabelClsHead(BaseHead):
+    """Classification head for multilabel task.
+
+    Args:
+        loss (dict): Config of classification loss.
+    """
+
+    def __init__(self,
+                 loss=dict(
+                     type='CrossEntropyLoss',
+                     use_sigmoid=True,
+                     reduction='mean',
+                     loss_weight=1.0),
+                 init_cfg=None):
+        super(MultiLabelClsHead, self).__init__(init_cfg=init_cfg)
+
+        assert isinstance(loss, dict)
+
+        self.compute_loss = build_loss(loss)
+
+    def loss(self, cls_score, gt_label):
+        gt_label = gt_label.type_as(cls_score)
+        num_samples = len(cls_score)
+        losses = dict()
+
+        # map difficult examples to positive ones
+        _gt_label = torch.abs(gt_label)
+        # compute loss
+        loss = self.compute_loss(cls_score, _gt_label, avg_factor=num_samples)
+        losses['loss'] = loss
+        return losses
+
+    def forward_train(self, cls_score, gt_label, **kwargs):
+        if isinstance(cls_score, tuple):
+            cls_score = cls_score[-1]
+        gt_label = gt_label.type_as(cls_score)
+        losses = self.loss(cls_score, gt_label, **kwargs)
+        return losses
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+
+        from mmcls.utils import get_root_logger
+        logger = get_root_logger()
+        logger.warning(
+            'The input of MultiLabelClsHead should be already logits. '
+            'Please modify the backbone if you want to get pre-logits feature.'
+        )
+        return x
+
+    def simple_test(self, x, sigmoid=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            cls_score (tuple[Tensor]): The input classification score logits.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. The shape of every item should be
+                ``(num_samples, num_classes)``.
+            sigmoid (bool): Whether to sigmoid the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        if isinstance(x, tuple):
+            x = x[-1]
+
+        if sigmoid:
+            pred = torch.sigmoid(x) if x is not None else None
+        else:
+            pred = x
+
+        if post_process:
+            return self.post_process(pred)
+        else:
+            return pred
+
+    def post_process(self, pred):
+        on_trace = is_tracing()
+        if torch.onnx.is_in_onnx_export() or on_trace:
+            return pred
+        pred = list(pred.detach().cpu().numpy())
+        return pred
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/multi_label_linear_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/multi_label_linear_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import HEADS
+from .multi_label_head import MultiLabelClsHead
+
+
+@HEADS.register_module()
+class MultiLabelLinearClsHead(MultiLabelClsHead):
+    """Linear classification head for multilabel task.
+
+    Args:
+        num_classes (int): Number of categories.
+        in_channels (int): Number of channels in the input feature map.
+        loss (dict): Config of classification loss.
+        init_cfg (dict | optional): The extra init config of layers.
+            Defaults to use dict(type='Normal', layer='Linear', std=0.01).
+    """
+
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 loss=dict(
+                     type='CrossEntropyLoss',
+                     use_sigmoid=True,
+                     reduction='mean',
+                     loss_weight=1.0),
+                 init_cfg=dict(type='Normal', layer='Linear', std=0.01)):
+        super(MultiLabelLinearClsHead, self).__init__(
+            loss=loss, init_cfg=init_cfg)
+
+        if num_classes <= 0:
+            raise ValueError(
+                f'num_classes={num_classes} must be a positive integer')
+
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+
+        self.fc = nn.Linear(self.in_channels, self.num_classes)
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+        return x
+
+    def forward_train(self, x, gt_label, **kwargs):
+        x = self.pre_logits(x)
+        gt_label = gt_label.type_as(x)
+        cls_score = self.fc(x)
+        losses = self.loss(cls_score, gt_label, **kwargs)
+        return losses
+
+    def simple_test(self, x, sigmoid=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            x (tuple[Tensor]): The input features.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. The shape of every item should be
+                ``(num_samples, in_channels)``.
+            sigmoid (bool): Whether to sigmoid the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        x = self.pre_logits(x)
+        cls_score = self.fc(x)
+
+        if sigmoid:
+            pred = torch.sigmoid(cls_score) if cls_score is not None else None
+        else:
+            pred = cls_score
+
+        if post_process:
+            return self.post_process(pred)
+        else:
+            return pred
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/stacked_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/stacked_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Dict, Sequence
+
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import build_activation_layer, build_norm_layer
+from mmcv.runner import BaseModule, ModuleList
+
+from ..builder import HEADS
+from .cls_head import ClsHead
+
+
+class LinearBlock(BaseModule):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 dropout_rate=0.,
+                 norm_cfg=None,
+                 act_cfg=None,
+                 init_cfg=None):
+        super().__init__(init_cfg=init_cfg)
+        self.fc = nn.Linear(in_channels, out_channels)
+
+        self.norm = None
+        self.act = None
+        self.dropout = None
+
+        if norm_cfg is not None:
+            self.norm = build_norm_layer(norm_cfg, out_channels)[1]
+        if act_cfg is not None:
+            self.act = build_activation_layer(act_cfg)
+        if dropout_rate > 0:
+            self.dropout = nn.Dropout(p=dropout_rate)
+
+    def forward(self, x):
+        x = self.fc(x)
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.act is not None:
+            x = self.act(x)
+        if self.dropout is not None:
+            x = self.dropout(x)
+        return x
+
+
+@HEADS.register_module()
+class StackedLinearClsHead(ClsHead):
+    """Classifier head with several hidden fc layer and a output fc layer.
+
+    Args:
+        num_classes (int): Number of categories.
+        in_channels (int): Number of channels in the input feature map.
+        mid_channels (Sequence): Number of channels in the hidden fc layers.
+        dropout_rate (float): Dropout rate after each hidden fc layer,
+            except the last layer. Defaults to 0.
+        norm_cfg (dict, optional): Config dict of normalization layer after
+            each hidden fc layer, except the last layer. Defaults to None.
+        act_cfg (dict, optional): Config dict of activation function after each
+            hidden layer, except the last layer. Defaults to use "ReLU".
+    """
+
+    def __init__(self,
+                 num_classes: int,
+                 in_channels: int,
+                 mid_channels: Sequence,
+                 dropout_rate: float = 0.,
+                 norm_cfg: Dict = None,
+                 act_cfg: Dict = dict(type='ReLU'),
+                 **kwargs):
+        super(StackedLinearClsHead, self).__init__(**kwargs)
+        assert num_classes > 0, \
+            f'`num_classes` of StackedLinearClsHead must be a positive ' \
+            f'integer, got {num_classes} instead.'
+        self.num_classes = num_classes
+
+        self.in_channels = in_channels
+
+        assert isinstance(mid_channels, Sequence), \
+            f'`mid_channels` of StackedLinearClsHead should be a sequence, ' \
+            f'instead of {type(mid_channels)}'
+        self.mid_channels = mid_channels
+
+        self.dropout_rate = dropout_rate
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+
+        self._init_layers()
+
+    def _init_layers(self):
+        self.layers = ModuleList()
+        in_channels = self.in_channels
+        for hidden_channels in self.mid_channels:
+            self.layers.append(
+                LinearBlock(
+                    in_channels,
+                    hidden_channels,
+                    dropout_rate=self.dropout_rate,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg))
+            in_channels = hidden_channels
+
+        self.layers.append(
+            LinearBlock(
+                self.mid_channels[-1],
+                self.num_classes,
+                dropout_rate=0.,
+                norm_cfg=None,
+                act_cfg=None))
+
+    def init_weights(self):
+        self.layers.init_weights()
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+        for layer in self.layers[:-1]:
+            x = layer(x)
+        return x
+
+    @property
+    def fc(self):
+        return self.layers[-1]
+
+    def simple_test(self, x, softmax=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            x (tuple[Tensor]): The input features.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. The shape of every item should be
+                ``(num_samples, in_channels)``.
+            softmax (bool): Whether to softmax the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        x = self.pre_logits(x)
+        cls_score = self.fc(x)
+
+        if softmax:
+            pred = (
+                F.softmax(cls_score, dim=1) if cls_score is not None else None)
+        else:
+            pred = cls_score
+
+        if post_process:
+            return self.post_process(pred)
+        else:
+            return pred
+
+    def forward_train(self, x, gt_label, **kwargs):
+        x = self.pre_logits(x)
+        cls_score = self.fc(x)
+        losses = self.loss(cls_score, gt_label, **kwargs)
+        return losses
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/vision_transformer_head.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/heads/vision_transformer_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+from collections import OrderedDict
+
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import build_activation_layer
+from mmcv.cnn.utils.weight_init import trunc_normal_
+from mmcv.runner import Sequential
+
+from ..builder import HEADS
+from .cls_head import ClsHead
+
+
+@HEADS.register_module()
+class VisionTransformerClsHead(ClsHead):
+    """Vision Transformer classifier head.
+
+    Args:
+        num_classes (int): Number of categories excluding the background
+            category.
+        in_channels (int): Number of channels in the input feature map.
+        hidden_dim (int): Number of the dimensions for hidden layer.
+            Defaults to None, which means no extra hidden layer.
+        act_cfg (dict): The activation config. Only available during
+            pre-training. Defaults to ``dict(type='Tanh')``.
+        init_cfg (dict): The extra initialization configs. Defaults to
+            ``dict(type='Constant', layer='Linear', val=0)``.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 hidden_dim=None,
+                 act_cfg=dict(type='Tanh'),
+                 init_cfg=dict(type='Constant', layer='Linear', val=0),
+                 *args,
+                 **kwargs):
+        super(VisionTransformerClsHead, self).__init__(
+            init_cfg=init_cfg, *args, **kwargs)
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+        self.hidden_dim = hidden_dim
+        self.act_cfg = act_cfg
+
+        if self.num_classes <= 0:
+            raise ValueError(
+                f'num_classes={num_classes} must be a positive integer')
+
+        self._init_layers()
+
+    def _init_layers(self):
+        if self.hidden_dim is None:
+            layers = [('head', nn.Linear(self.in_channels, self.num_classes))]
+        else:
+            layers = [
+                ('pre_logits', nn.Linear(self.in_channels, self.hidden_dim)),
+                ('act', build_activation_layer(self.act_cfg)),
+                ('head', nn.Linear(self.hidden_dim, self.num_classes)),
+            ]
+        self.layers = Sequential(OrderedDict(layers))
+
+    def init_weights(self):
+        super(VisionTransformerClsHead, self).init_weights()
+        # Modified from ClassyVision
+        if hasattr(self.layers, 'pre_logits'):
+            # Lecun norm
+            trunc_normal_(
+                self.layers.pre_logits.weight,
+                std=math.sqrt(1 / self.layers.pre_logits.in_features))
+            nn.init.zeros_(self.layers.pre_logits.bias)
+
+    def pre_logits(self, x):
+        if isinstance(x, tuple):
+            x = x[-1]
+        _, cls_token = x
+        if self.hidden_dim is None:
+            return cls_token
+        else:
+            x = self.layers.pre_logits(cls_token)
+            return self.layers.act(x)
+
+    def simple_test(self, x, softmax=True, post_process=True):
+        """Inference without augmentation.
+
+        Args:
+            x (tuple[tuple[tensor, tensor]]): The input features.
+                Multi-stage inputs are acceptable but only the last stage will
+                be used to classify. Every item should be a tuple which
+                includes patch token and cls token. The cls token will be used
+                to classify and the shape of it should be
+                ``(num_samples, in_channels)``.
+            softmax (bool): Whether to softmax the classification score.
+            post_process (bool): Whether to do post processing the
+                inference results. It will convert the output to a list.
+
+        Returns:
+            Tensor | list: The inference results.
+
+                - If no post processing, the output is a tensor with shape
+                  ``(num_samples, num_classes)``.
+                - If post processing, the output is a multi-dimentional list of
+                  float and the dimensions are ``(num_samples, num_classes)``.
+        """
+        x = self.pre_logits(x)
+        cls_score = self.layers.head(x)
+
+        if softmax:
+            pred = (
+                F.softmax(cls_score, dim=1) if cls_score is not None else None)
+        else:
+            pred = cls_score
+
+        if post_process:
+            return self.post_process(pred)
+        else:
+            return pred
+
+    def forward_train(self, x, gt_label, **kwargs):
+        x = self.pre_logits(x)
+        cls_score = self.layers.head(x)
+        losses = self.loss(cls_score, gt_label, **kwargs)
+        return losses
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/__init__.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .accuracy import Accuracy, accuracy
+from .asymmetric_loss import AsymmetricLoss, asymmetric_loss
+from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
+                                 cross_entropy)
+from .focal_loss import FocalLoss, sigmoid_focal_loss
+from .label_smooth_loss import LabelSmoothLoss
+from .seesaw_loss import SeesawLoss
+from .utils import (convert_to_one_hot, reduce_loss, weight_reduce_loss,
+                    weighted_loss)
+
+__all__ = [
+    'accuracy', 'Accuracy', 'asymmetric_loss', 'AsymmetricLoss',
+    'cross_entropy', 'binary_cross_entropy', 'CrossEntropyLoss', 'reduce_loss',
+    'weight_reduce_loss', 'LabelSmoothLoss', 'weighted_loss', 'FocalLoss',
+    'sigmoid_focal_loss', 'convert_to_one_hot', 'SeesawLoss'
+]
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/accuracy.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/accuracy.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from numbers import Number
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+def accuracy_numpy(pred, target, topk=(1, ), thrs=0.):
+    if isinstance(thrs, Number):
+        thrs = (thrs, )
+        res_single = True
+    elif isinstance(thrs, tuple):
+        res_single = False
+    else:
+        raise TypeError(
+            f'thrs should be a number or tuple, but got {type(thrs)}.')
+
+    res = []
+    maxk = max(topk)
+    num = pred.shape[0]
+
+    static_inds = np.indices((num, maxk))[0]
+    pred_label = pred.argpartition(-maxk, axis=1)[:, -maxk:]
+    pred_score = pred[static_inds, pred_label]
+
+    sort_inds = np.argsort(pred_score, axis=1)[:, ::-1]
+    pred_label = pred_label[static_inds, sort_inds]
+    pred_score = pred_score[static_inds, sort_inds]
+
+    for k in topk:
+        correct_k = pred_label[:, :k] == target.reshape(-1, 1)
+        res_thr = []
+        for thr in thrs:
+            # Only prediction values larger than thr are counted as correct
+            _correct_k = correct_k & (pred_score[:, :k] > thr)
+            _correct_k = np.logical_or.reduce(_correct_k, axis=1)
+            res_thr.append((_correct_k.sum() * 100. / num))
+        if res_single:
+            res.append(res_thr[0])
+        else:
+            res.append(res_thr)
+    return res
+
+
+def accuracy_torch(pred, target, topk=(1, ), thrs=0.):
+    if isinstance(thrs, Number):
+        thrs = (thrs, )
+        res_single = True
+    elif isinstance(thrs, tuple):
+        res_single = False
+    else:
+        raise TypeError(
+            f'thrs should be a number or tuple, but got {type(thrs)}.')
+
+    res = []
+    maxk = max(topk)
+    num = pred.size(0)
+    pred = pred.float()
+    pred_score, pred_label = pred.topk(maxk, dim=1)
+    pred_label = pred_label.t()
+    correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
+    for k in topk:
+        res_thr = []
+        for thr in thrs:
+            # Only prediction values larger than thr are counted as correct
+            _correct = correct & (pred_score.t() > thr)
+            correct_k = _correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res_thr.append((correct_k.mul_(100. / num)))
+        if res_single:
+            res.append(res_thr[0])
+        else:
+            res.append(res_thr)
+    return res
+
+
+def accuracy(pred, target, topk=1, thrs=0.):
+    """Calculate accuracy according to the prediction and target.
+
+    Args:
+        pred (torch.Tensor | np.array): The model prediction.
+        target (torch.Tensor | np.array): The target of each prediction
+        topk (int | tuple[int]): If the predictions in ``topk``
+            matches the target, the predictions will be regarded as
+            correct ones. Defaults to 1.
+        thrs (Number | tuple[Number], optional): Predictions with scores under
+            the thresholds are considered negative. Default to 0.
+
+    Returns:
+        torch.Tensor | list[torch.Tensor] | list[list[torch.Tensor]]: Accuracy
+            - torch.Tensor: If both ``topk`` and ``thrs`` is a single value.
+            - list[torch.Tensor]: If one of ``topk`` or ``thrs`` is a tuple.
+            - list[list[torch.Tensor]]: If both ``topk`` and ``thrs`` is a \
+              tuple. And the first dim is ``topk``, the second dim is ``thrs``.
+    """
+    assert isinstance(topk, (int, tuple))
+    if isinstance(topk, int):
+        topk = (topk, )
+        return_single = True
+    else:
+        return_single = False
+
+    assert isinstance(pred, (torch.Tensor, np.ndarray)), \
+        f'The pred should be torch.Tensor or np.ndarray ' \
+        f'instead of {type(pred)}.'
+    assert isinstance(target, (torch.Tensor, np.ndarray)), \
+        f'The target should be torch.Tensor or np.ndarray ' \
+        f'instead of {type(target)}.'
+
+    # torch version is faster in most situations.
+    to_tensor = (lambda x: torch.from_numpy(x)
+                 if isinstance(x, np.ndarray) else x)
+    pred = to_tensor(pred)
+    target = to_tensor(target)
+
+    res = accuracy_torch(pred, target, topk, thrs)
+
+    return res[0] if return_single else res
+
+
+class Accuracy(nn.Module):
+
+    def __init__(self, topk=(1, )):
+        """Module to calculate the accuracy.
+
+        Args:
+            topk (tuple): The criterion used to calculate the
+                accuracy. Defaults to (1,).
+        """
+        super().__init__()
+        self.topk = topk
+
+    def forward(self, pred, target):
+        """Forward function to calculate accuracy.
+
+        Args:
+            pred (torch.Tensor): Prediction of models.
+            target (torch.Tensor): Target for each prediction.
+
+        Returns:
+            list[torch.Tensor]: The accuracies under different topk criterions.
+        """
+        return accuracy(pred, target, self.topk)
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/asymmetric_loss.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/asymmetric_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+from .utils import convert_to_one_hot, weight_reduce_loss
+
+
+def asymmetric_loss(pred,
+                    target,
+                    weight=None,
+                    gamma_pos=1.0,
+                    gamma_neg=4.0,
+                    clip=0.05,
+                    reduction='mean',
+                    avg_factor=None,
+                    use_sigmoid=True,
+                    eps=1e-8):
+    r"""asymmetric loss.
+
+    Please refer to the `paper <https://arxiv.org/abs/2009.14119>`__ for
+    details.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, \*).
+        target (torch.Tensor): The ground truth label of the prediction with
+            shape (N, \*).
+        weight (torch.Tensor, optional): Sample-wise loss weight with shape
+            (N, ). Defaults to None.
+        gamma_pos (float): positive focusing parameter. Defaults to 0.0.
+        gamma_neg (float): Negative focusing parameter. We usually set
+            gamma_neg > gamma_pos. Defaults to 4.0.
+        clip (float, optional): Probability margin. Defaults to 0.05.
+        reduction (str): The method used to reduce the loss.
+            Options are "none", "mean" and "sum". If reduction is 'none' , loss
+            is same shape as pred and label. Defaults to 'mean'.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+        use_sigmoid (bool): Whether the prediction uses sigmoid instead
+            of softmax. Defaults to True.
+        eps (float): The minimum value of the argument of logarithm. Defaults
+            to 1e-8.
+
+    Returns:
+        torch.Tensor: Loss.
+    """
+    assert pred.shape == \
+        target.shape, 'pred and target should be in the same shape.'
+
+    if use_sigmoid:
+        pred_sigmoid = pred.sigmoid()
+    else:
+        pred_sigmoid = nn.functional.softmax(pred, dim=-1)
+
+    target = target.type_as(pred)
+
+    if clip and clip > 0:
+        pt = (1 - pred_sigmoid +
+              clip).clamp(max=1) * (1 - target) + pred_sigmoid * target
+    else:
+        pt = (1 - pred_sigmoid) * (1 - target) + pred_sigmoid * target
+    asymmetric_weight = (1 - pt).pow(gamma_pos * target + gamma_neg *
+                                     (1 - target))
+    loss = -torch.log(pt.clamp(min=eps)) * asymmetric_weight
+    if weight is not None:
+        assert weight.dim() == 1
+        weight = weight.float()
+        if pred.dim() > 1:
+            weight = weight.reshape(-1, 1)
+    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+    return loss
+
+
+@LOSSES.register_module()
+class AsymmetricLoss(nn.Module):
+    """asymmetric loss.
+
+    Args:
+        gamma_pos (float): positive focusing parameter.
+            Defaults to 0.0.
+        gamma_neg (float): Negative focusing parameter. We
+            usually set gamma_neg > gamma_pos. Defaults to 4.0.
+        clip (float, optional): Probability margin. Defaults to 0.05.
+        reduction (str): The method used to reduce the loss into
+            a scalar.
+        loss_weight (float): Weight of loss. Defaults to 1.0.
+        use_sigmoid (bool): Whether the prediction uses sigmoid instead
+            of softmax. Defaults to True.
+        eps (float): The minimum value of the argument of logarithm. Defaults
+            to 1e-8.
+    """
+
+    def __init__(self,
+                 gamma_pos=0.0,
+                 gamma_neg=4.0,
+                 clip=0.05,
+                 reduction='mean',
+                 loss_weight=1.0,
+                 use_sigmoid=True,
+                 eps=1e-8):
+        super(AsymmetricLoss, self).__init__()
+        self.gamma_pos = gamma_pos
+        self.gamma_neg = gamma_neg
+        self.clip = clip
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+        self.use_sigmoid = use_sigmoid
+        self.eps = eps
+
+    def forward(self,
+                pred,
+                target,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        r"""asymmetric loss.
+
+        Args:
+            pred (torch.Tensor): The prediction with shape (N, \*).
+            target (torch.Tensor): The ground truth label of the prediction
+                with shape (N, \*), N or (N,1).
+            weight (torch.Tensor, optional): Sample-wise loss weight with shape
+                (N, \*). Defaults to None.
+            avg_factor (int, optional): Average factor that is used to average
+                the loss. Defaults to None.
+            reduction_override (str, optional): The method used to reduce the
+                loss into a scalar. Options are "none", "mean" and "sum".
+                Defaults to None.
+
+        Returns:
+            torch.Tensor: Loss.
+        """
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        if target.dim() == 1 or (target.dim() == 2 and target.shape[1] == 1):
+            target = convert_to_one_hot(target.view(-1, 1), pred.shape[-1])
+        loss_cls = self.loss_weight * asymmetric_loss(
+            pred,
+            target,
+            weight,
+            gamma_pos=self.gamma_pos,
+            gamma_neg=self.gamma_neg,
+            clip=self.clip,
+            reduction=reduction,
+            avg_factor=avg_factor,
+            use_sigmoid=self.use_sigmoid,
+            eps=self.eps)
+        return loss_cls
--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/cross_entropy_loss.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/cross_entropy_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+from .utils import weight_reduce_loss
+
+
+def cross_entropy(pred,
+                  label,
+                  weight=None,
+                  reduction='mean',
+                  avg_factor=None,
+                  class_weight=None):
+    """Calculate the CrossEntropy loss.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, C), C is the number
+            of classes.
+        label (torch.Tensor): The gt label of the prediction.
+        weight (torch.Tensor, optional): Sample-wise loss weight.
+        reduction (str): The method used to reduce the loss.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+        class_weight (torch.Tensor, optional): The weight for each class with
+            shape (C), C is the number of classes. Default None.
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    # element-wise losses
+    loss = F.cross_entropy(pred, label, weight=class_weight, reduction='none')
+
+    # apply weights and do the reduction
+    if weight is not None:
+        weight = weight.float()
+    loss = weight_reduce_loss(
+        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+
+    return loss
+
+
+def soft_cross_entropy(pred,
+                       label,
+                       weight=None,
+                       reduction='mean',
+                       class_weight=None,
+                       avg_factor=None):
+    """Calculate the Soft CrossEntropy loss. The label can be float.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, C), C is the number
+            of classes.
+        label (torch.Tensor): The gt label of the prediction with shape (N, C).
+            When using "mixup", the label can be float.
+        weight (torch.Tensor, optional): Sample-wise loss weight.
+        reduction (str): The method used to reduce the loss.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+        class_weight (torch.Tensor, optional): The weight for each class with
+            shape (C), C is the number of classes. Default None.
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    # element-wise losses
+    loss = -label * F.log_softmax(pred, dim=-1)
+    if class_weight is not None:
+        loss *= class_weight
+    loss = loss.sum(dim=-1)
+
+    # apply weights and do the reduction
+    if weight is not None:
+        weight = weight.float()
+    loss = weight_reduce_loss(
+        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+
+    return loss
+
+
+def binary_cross_entropy(pred,
+                         label,
+                         weight=None,
+                         reduction='mean',
+                         avg_factor=None,
+                         class_weight=None,
+                         pos_weight=None):
+    r"""Calculate the binary CrossEntropy loss with logits.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, \*).
+        label (torch.Tensor): The gt label with shape (N, \*).
+        weight (torch.Tensor, optional): Element-wise weight of loss with shape
+            (N, ). Defaults to None.
+        reduction (str): The method used to reduce the loss.
+            Options are "none", "mean" and "sum". If reduction is 'none' , loss
+            is same shape as pred and label. Defaults to 'mean'.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+        class_weight (torch.Tensor, optional): The weight for each class with
+            shape (C), C is the number of classes. Default None.
+        pos_weight (torch.Tensor, optional): The positive weight for each
+            class with shape (C), C is the number of classes. Default None.
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    # Ensure that the size of class_weight is consistent with pred and label to
+    # avoid automatic boracast,
+    assert pred.dim() == label.dim()
+
+    if class_weight is not None:
+        N = pred.size()[0]
+        class_weight = class_weight.repeat(N, 1)
+    loss = F.binary_cross_entropy_with_logits(
+        pred,
+        label,
+        weight=class_weight,
+        pos_weight=pos_weight,
+        reduction='none')
+
+    # apply weights and do the reduction
+    if weight is not None:
+        assert weight.dim() == 1
+        weight = weight.float()
+        if pred.dim() > 1:
+            weight = weight.reshape(-1, 1)
+    loss = weight_reduce_loss(
+        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+    return loss
+
+
+@LOSSES.register_module()
+class CrossEntropyLoss(nn.Module):
+    """Cross entropy loss.
+
+    Args:
+        use_sigmoid (bool): Whether the prediction uses sigmoid
+            of softmax. Defaults to False.
+        use_soft (bool): Whether to use the soft version of CrossEntropyLoss.
+            Defaults to False.
+        reduction (str): The method used to reduce the loss.
+            Options are "none", "mean" and "sum". Defaults to 'mean'.
+        loss_weight (float):  Weight of the loss. Defaults to 1.0.
+        class_weight (List[float], optional): The weight for each class with
+            shape (C), C is the number of classes. Default None.
+        pos_weight (List[float], optional): The positive weight for each
+            class with shape (C), C is the number of classes. Only enabled in
+            BCE loss when ``use_sigmoid`` is True. Default None.
+    """
+
+    def __init__(self,
+                 use_sigmoid=False,
+                 use_soft=False,
+                 reduction='mean',
+                 loss_weight=1.0,
+                 class_weight=None,
+                 pos_weight=None):
+        super(CrossEntropyLoss, self).__init__()
+        self.use_sigmoid = use_sigmoid
+        self.use_soft = use_soft
+        assert not (
+            self.use_soft and self.use_sigmoid
+        ), 'use_sigmoid and use_soft could not be set simultaneously'
+
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+        self.class_weight = class_weight
+        self.pos_weight = pos_weight
+
+        if self.use_sigmoid:
+            self.cls_criterion = binary_cross_entropy
+        elif self.use_soft:
+            self.cls_criterion = soft_cross_entropy
+        else:
+            self.cls_criterion = cross_entropy
+
+    def forward(self,
+                cls_score,
+                label,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None,
+                **kwargs):
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+
+        if self.class_weight is not None:
+            class_weight = cls_score.new_tensor(self.class_weight)
+        else:
+            class_weight = None
+
+        # only BCE loss has pos_weight
+        if self.pos_weight is not None and self.use_sigmoid:
+            pos_weight = cls_score.new_tensor(self.pos_weight)
+            kwargs.update({'pos_weight': pos_weight})
+        else:
+            pos_weight = None
+
+        loss_cls = self.loss_weight * self.cls_criterion(
+            cls_score,
+            label,
+            weight,
+            class_weight=class_weight,
+            reduction=reduction,
+            avg_factor=avg_factor,
+            **kwargs)
+        return loss_cls
--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/focal_loss.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/focal_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch.nn as nn
 import torch.nn.functional as F

 from ..builder import LOSSES
-from .utils import weight_reduce_loss
+from .utils import convert_to_one_hot, weight_reduce_loss


 def sigmoid_focal_loss(pred,
@@ -12,14 +13,14 @@ def sigmoid_focal_loss(pred,
                       alpha=0.25,
                       reduction='mean',
                       avg_factor=None):
-    """Sigmoid focal loss.
+    r"""Sigmoid focal loss.

    Args:
-        pred (torch.Tensor): The prediction with shape (N, *).
+        pred (torch.Tensor): The prediction with shape (N, \*).
        target (torch.Tensor): The ground truth label of the prediction with
-            shape (N, *).
+            shape (N, \*).
        weight (torch.Tensor, optional): Sample-wise loss weight with shape
-            (N, ). Dafaults to None.
+            (N, ). Defaults to None.
        gamma (float): The gamma for calculating the modulating factor.
            Defaults to 2.0.
        alpha (float): A balanced form for Focal Loss. Defaults to 0.25.
@@ -82,16 +83,16 @@ class FocalLoss(nn.Module):
                weight=None,
                avg_factor=None,
                reduction_override=None):
-        """Sigmoid focal loss.
+        r"""Sigmoid focal loss.

        Args:
-            pred (torch.Tensor): The prediction with shape (N, *).
+            pred (torch.Tensor): The prediction with shape (N, \*).
            target (torch.Tensor): The ground truth label of the prediction
-                with shape (N, *).
+                with shape (N, \*), N or (N,1).
            weight (torch.Tensor, optional): Sample-wise loss weight with shape
-            (N, *). Dafaults to None.
+                (N, \*). Defaults to None.
            avg_factor (int, optional): Average factor that is used to average
-            the loss. Defaults to None.
+                the loss. Defaults to None.
            reduction_override (str, optional): The method used to reduce the
                loss into a scalar. Options are "none", "mean" and "sum".
                Defaults to None.
@@ -102,6 +103,8 @@ class FocalLoss(nn.Module):
        assert reduction_override in (None, 'none', 'mean', 'sum')
        reduction = (
            reduction_override if reduction_override else self.reduction)
+        if target.dim() == 1 or (target.dim() == 2 and target.shape[1] == 1):
+            target = convert_to_one_hot(target.view(-1, 1), pred.shape[-1])
        loss_cls = self.loss_weight * sigmoid_focal_loss(
            pred,
            target,

--- a/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/label_smooth_loss.py
+++ b/openmmlab_test/mmclassification-speed-benchmark/mmcls/models/losses/label_smooth_loss.py
-import warnings
-
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch
 import torch.nn as nn

@@ -10,9 +9,10 @@ from .utils import convert_to_one_hot

 @LOSSES.register_module()
 class LabelSmoothLoss(nn.Module):
-    r"""Intializer for the label smoothed cross entropy loss.
-    Refers to `Rethinking the Inception Architecture for Computer Vision` -
-        https://arxiv.org/abs/1512.00567
+    r"""Initializer for the label smoothed cross entropy loss.
+
+    Refers to `Rethinking the Inception Architecture for Computer Vision
+    <https://arxiv.org/abs/1512.00567>`_

    This decreases gap between output scores and encourages generalization.
    Labels provided to forward can be one-hot like vectors (NxC) or class
@@ -24,7 +24,7 @@ class LabelSmoothLoss(nn.Module):
        label_smooth_val (float): The degree of label smoothing.
        num_classes (int, optional): Number of classes. Defaults to None.
        mode (str): Refers to notes, Options are 'original', 'classy_vision',
-            'multi_label'. Defaults to 'classy_vision'
+            'multi_label'. Defaults to 'original'
        reduction (str): The method used to reduce the loss.
            Options are "none", "mean" and "sum". Defaults to 'mean'.
        loss_weight (float):  Weight of the loss. Defaults to 1.0.
@@ -34,7 +34,7 @@ class LabelSmoothLoss(nn.Module):
        as the original paper as:

        .. math::
-        (1-\epsilon)\delta_{k, y} + \frac{\epsilon}{K}
+            (1-\epsilon)\delta_{k, y} + \frac{\epsilon}{K}

        where epsilon is the `label_smooth_val`, K is the num_classes and
        delta(k,y) is Dirac delta, which equals 1 for k=y and 0 otherwise.
@@ -43,19 +43,19 @@ class LabelSmoothLoss(nn.Module):
        method as the facebookresearch/ClassyVision repo as:

        .. math::
-        \frac{\delta_{k, y} + \epsilon/K}{1+\epsilon}
+            \frac{\delta_{k, y} + \epsilon/K}{1+\epsilon}

        if the mode is "multi_label", this will accept labels from multi-label
        task and smoothing them as:

        .. math::
-        (1-2\epsilon)\delta_{k, y} + \epsilon
+            (1-2\epsilon)\delta_{k, y} + \epsilon
    """

    def __init__(self,
                 label_smooth_val,
                 num_classes=None,
-                 mode=None,
+                 mode='original',
                 reduction='mean',
                 loss_weight=1.0):
        super().__init__()
@@ -74,14 +74,6 @@ class LabelSmoothLoss(nn.Module):
            f'but gets {mode}.'
        self.reduction = reduction

-        if mode is None:
-            warnings.warn(
-                'LabelSmoothLoss mode is not set, use "classy_vision" '
-                'by default. The default value will be changed to '
-                '"original" recently. Please set mode manually if want '
-                'to keep "classy_vision".', UserWarning)
-            mode = 'classy_vision'
-
        accept_mode = {'original', 'classy_vision', 'multi_label'}
        assert mode in accept_mode, \
            f'LabelSmoothLoss supports mode {accept_mode}, but gets {mode}.'
@@ -124,6 +116,23 @@ class LabelSmoothLoss(nn.Module):
                avg_factor=None,
                reduction_override=None,
                **kwargs):
+        r"""Label smooth loss.
+
+        Args:
+            pred (torch.Tensor): The prediction with shape (N, \*).
+            label (torch.Tensor): The ground truth label of the prediction
+                with shape (N, \*).
+            weight (torch.Tensor, optional): Sample-wise loss weight with shape
+                (N, \*). Defaults to None.
+            avg_factor (int, optional): Average factor that is used to average
+                the loss. Defaults to None.
+            reduction_override (str, optional): The method used to reduce the
+                loss into a scalar. Options are "none", "mean" and "sum".
+                Defaults to None.
+
+        Returns:
+            torch.Tensor: Loss.
+        """
        if self.num_classes is not None:
            assert self.num_classes == cls_score.shape[1], \
                f'num_classes should equal to cls_score.shape[1], ' \

--- a/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/seesaw_loss.py
+++ b/openmmlab_test/mmclassification-0.24.1/mmcls/models/losses/seesaw_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+# migrate from mmdetection with modifications
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+from .utils import weight_reduce_loss
+
+
+def seesaw_ce_loss(cls_score,
+                   labels,
+                   weight,
+                   cum_samples,
+                   num_classes,
+                   p,
+                   q,
+                   eps,
+                   reduction='mean',
+                   avg_factor=None):
+    """Calculate the Seesaw CrossEntropy loss.
+
+    Args:
+        cls_score (torch.Tensor): The prediction with shape (N, C),
+             C is the number of classes.
+        labels (torch.Tensor): The learning label of the prediction.
+        weight (torch.Tensor): Sample-wise loss weight.
+        cum_samples (torch.Tensor): Cumulative samples for each category.
+        num_classes (int): The number of classes.
+        p (float): The ``p`` in the mitigation factor.
+        q (float): The ``q`` in the compenstation factor.
+        eps (float): The minimal value of divisor to smooth
+             the computation of compensation factor
+        reduction (str, optional): The method used to reduce the loss.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    assert cls_score.size(-1) == num_classes
+    assert len(cum_samples) == num_classes
+
+    onehot_labels = F.one_hot(labels, num_classes)
+    seesaw_weights = cls_score.new_ones(onehot_labels.size())
+
+    # mitigation factor
+    if p > 0:
+        sample_ratio_matrix = cum_samples[None, :].clamp(
+            min=1) / cum_samples[:, None].clamp(min=1)
+        index = (sample_ratio_matrix < 1.0).float()
+        sample_weights = sample_ratio_matrix.pow(p) * index + (1 - index
+                                                               )  # M_{ij}
+        mitigation_factor = sample_weights[labels.long(), :]
+        seesaw_weights = seesaw_weights * mitigation_factor
+
+    # compensation factor
+    if q > 0:
+        scores = F.softmax(cls_score.detach(), dim=1)
+        self_scores = scores[
+            torch.arange(0, len(scores)).to(scores.device).long(),
+            labels.long()]
+        score_matrix = scores / self_scores[:, None].clamp(min=eps)
+        index = (score_matrix > 1.0).float()
+        compensation_factor = score_matrix.pow(q) * index + (1 - index)
+        seesaw_weights = seesaw_weights * compensation_factor
+
+    cls_score = cls_score + (seesaw_weights.log() * (1 - onehot_labels))
+
+    loss = F.cross_entropy(cls_score, labels, weight=None, reduction='none')
+
+    if weight is not None:
+        weight = weight.float()
+    loss = weight_reduce_loss(
+        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+    return loss
+
+
+@LOSSES.register_module()
+class SeesawLoss(nn.Module):
+    """Implementation of seesaw loss.
+
+    Refers to `Seesaw Loss for Long-Tailed Instance Segmentation (CVPR 2021)
+    <https://arxiv.org/abs/2008.10032>`_
+
+    Args:
+        use_sigmoid (bool): Whether the prediction uses sigmoid of softmax.
+             Only False is supported. Defaults to False.
+        p (float): The ``p`` in the mitigation factor.
+             Defaults to 0.8.
+        q (float): The ``q`` in the compenstation factor.
+             Defaults to 2.0.
+        num_classes (int): The number of classes.
+             Default to 1000 for the ImageNet dataset.
+        eps (float): The minimal value of divisor to smooth
+             the computation of compensation factor, default to 1e-2.
+        reduction (str): The method that reduces the loss to a scalar.
+             Options are "none", "mean" and "sum". Default to "mean".
+        loss_weight (float): The weight of the loss. Defaults to 1.0
+    """
+
+    def __init__(self,
+                 use_sigmoid=False,
+                 p=0.8,
+                 q=2.0,
+                 num_classes=1000,
+                 eps=1e-2,
+                 reduction='mean',
+                 loss_weight=1.0):
+        super(SeesawLoss, self).__init__()
+        assert not use_sigmoid, '`use_sigmoid` is not supported'
+        self.use_sigmoid = False
+        self.p = p
+        self.q = q
+        self.num_classes = num_classes
+        self.eps = eps
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+        self.cls_criterion = seesaw_ce_loss
+
+        # cumulative samples for each category
+        self.register_buffer('cum_samples',
+                             torch.zeros(self.num_classes, dtype=torch.float))
+
+    def forward(self,
+                cls_score,
+                labels,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        """Forward function.
+
+        Args:
+            cls_score (torch.Tensor): The prediction with shape (N, C).
+            labels (torch.Tensor): The learning label of the prediction.
+            weight (torch.Tensor, optional): Sample-wise loss weight.
+            avg_factor (int, optional): Average factor that is used to average
+                 the loss. Defaults to None.
+            reduction (str, optional): The method used to reduce the loss.
+                 Options are "none", "mean" and "sum".
+        Returns:
+            torch.Tensor: The calculated loss
+        """
+        assert reduction_override in (None, 'none', 'mean', 'sum'), \
+            f'The `reduction_override` should be one of (None, "none", ' \
+            f'"mean", "sum"), but get "{reduction_override}".'
+        assert cls_score.size(0) == labels.view(-1).size(0), \
+            f'Expected `labels` shape [{cls_score.size(0)}], ' \
+            f'but got {list(labels.size())}'
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        assert cls_score.size(-1) == self.num_classes, \
+            f'The channel number of output ({cls_score.size(-1)}) does ' \
+            f'not match the `num_classes` of seesaw loss ({self.num_classes}).'
+
+        # accumulate the samples for each category
+        unique_labels = labels.unique()
+        for u_l in unique_labels:
+            inds_ = labels == u_l.item()
+            self.cum_samples[u_l] += inds_.sum()
+
+        if weight is not None:
+            weight = weight.float()
+        else:
+            weight = labels.new_ones(labels.size(), dtype=torch.float)
+
+        # calculate loss_cls_classes
+        loss_cls = self.loss_weight * self.cls_criterion(
+            cls_score, labels, weight, self.cum_samples, self.num_classes,
+            self.p, self.q, self.eps, reduction, avg_factor)
+
+        return loss_cls