Commit 0fd8347d authored by unknown's avatar unknown
Browse files

添加mmclassification-0.24.1代码,删除mmclassification-speed-benchmark

parent cc567e9e
# Copyright (c) OpenMMLab. All rights reserved.
from ..builder import CLASSIFIERS, build_backbone, build_head, build_neck
from ..heads import MultiLabelClsHead
from ..utils.augment import Augments
from .base import BaseClassifier
@CLASSIFIERS.register_module()
class ImageClassifier(BaseClassifier):
def __init__(self,
backbone,
neck=None,
head=None,
pretrained=None,
train_cfg=None,
init_cfg=None):
super(ImageClassifier, self).__init__(init_cfg)
if pretrained is not None:
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
self.backbone = build_backbone(backbone)
if neck is not None:
self.neck = build_neck(neck)
if head is not None:
self.head = build_head(head)
self.augments = None
if train_cfg is not None:
augments_cfg = train_cfg.get('augments', None)
if augments_cfg is not None:
self.augments = Augments(augments_cfg)
def forward_dummy(self, img):
"""Used for computing network flops.
See `mmclassificaiton/tools/analysis_tools/get_flops.py`
"""
return self.extract_feat(img, stage='pre_logits')
def extract_feat(self, img, stage='neck'):
"""Directly extract features from the specified stage.
Args:
img (Tensor): The input images. The shape of it should be
``(num_samples, num_channels, *img_shape)``.
stage (str): Which stage to output the feature. Choose from
"backbone", "neck" and "pre_logits". Defaults to "neck".
Returns:
tuple | Tensor: The output of specified stage.
The output depends on detailed implementation. In general, the
output of backbone and neck is a tuple and the output of
pre_logits is a tensor.
Examples:
1. Backbone output
>>> import torch
>>> from mmcv import Config
>>> from mmcls.models import build_classifier
>>>
>>> cfg = Config.fromfile('configs/resnet/resnet18_8xb32_in1k.py').model
>>> cfg.backbone.out_indices = (0, 1, 2, 3) # Output multi-scale feature maps
>>> model = build_classifier(cfg)
>>> outs = model.extract_feat(torch.rand(1, 3, 224, 224), stage='backbone')
>>> for out in outs:
... print(out.shape)
torch.Size([1, 64, 56, 56])
torch.Size([1, 128, 28, 28])
torch.Size([1, 256, 14, 14])
torch.Size([1, 512, 7, 7])
2. Neck output
>>> import torch
>>> from mmcv import Config
>>> from mmcls.models import build_classifier
>>>
>>> cfg = Config.fromfile('configs/resnet/resnet18_8xb32_in1k.py').model
>>> cfg.backbone.out_indices = (0, 1, 2, 3) # Output multi-scale feature maps
>>> model = build_classifier(cfg)
>>>
>>> outs = model.extract_feat(torch.rand(1, 3, 224, 224), stage='neck')
>>> for out in outs:
... print(out.shape)
torch.Size([1, 64])
torch.Size([1, 128])
torch.Size([1, 256])
torch.Size([1, 512])
3. Pre-logits output (without the final linear classifier head)
>>> import torch
>>> from mmcv import Config
>>> from mmcls.models import build_classifier
>>>
>>> cfg = Config.fromfile('configs/vision_transformer/vit-base-p16_pt-64xb64_in1k-224.py').model
>>> model = build_classifier(cfg)
>>>
>>> out = model.extract_feat(torch.rand(1, 3, 224, 224), stage='pre_logits')
>>> print(out.shape) # The hidden dims in head is 3072
torch.Size([1, 3072])
""" # noqa: E501
assert stage in ['backbone', 'neck', 'pre_logits'], \
(f'Invalid output stage "{stage}", please choose from "backbone", '
'"neck" and "pre_logits"')
x = self.backbone(img)
if stage == 'backbone':
return x
if self.with_neck:
x = self.neck(x)
if stage == 'neck':
return x
if self.with_head and hasattr(self.head, 'pre_logits'):
x = self.head.pre_logits(x)
return x
def forward_train(self, img, gt_label, **kwargs):
"""Forward computation during training.
Args:
img (Tensor): of shape (N, C, H, W) encoding input images.
Typically these should be mean centered and std scaled.
gt_label (Tensor): It should be of shape (N, 1) encoding the
ground-truth label of input images for single label task. It
should be of shape (N, C) encoding the ground-truth label
of input images for multi-labels task.
Returns:
dict[str, Tensor]: a dictionary of loss components
"""
if self.augments is not None:
img, gt_label = self.augments(img, gt_label)
x = self.extract_feat(img)
losses = dict()
loss = self.head.forward_train(x, gt_label)
losses.update(loss)
return losses
def simple_test(self, img, img_metas=None, **kwargs):
"""Test without augmentation."""
x = self.extract_feat(img)
if isinstance(self.head, MultiLabelClsHead):
assert 'softmax' not in kwargs, (
'Please use `sigmoid` instead of `softmax` '
'in multi-label tasks.')
res = self.head.simple_test(x, **kwargs)
return res
# Copyright (c) OpenMMLab. All rights reserved.
from .cls_head import ClsHead
from .conformer_head import ConformerHead
from .deit_head import DeiTClsHead
from .efficientformer_head import EfficientFormerClsHead
from .linear_head import LinearClsHead
from .multi_label_csra_head import CSRAClsHead
from .multi_label_head import MultiLabelClsHead
from .multi_label_linear_head import MultiLabelLinearClsHead
from .stacked_head import StackedLinearClsHead
from .vision_transformer_head import VisionTransformerClsHead
__all__ = [
'ClsHead', 'LinearClsHead', 'StackedLinearClsHead', 'MultiLabelClsHead',
'MultiLabelLinearClsHead', 'VisionTransformerClsHead', 'DeiTClsHead',
'ConformerHead', 'EfficientFormerClsHead', 'CSRAClsHead'
]
# Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta, abstractmethod
from mmcv.runner import BaseModule
......
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
import torch
import torch.nn.functional as F
from mmcls.models.losses import Accuracy
from ..builder import HEADS, build_loss
from ..utils import is_tracing
from .base_head import BaseHead
@HEADS.register_module()
class ClsHead(BaseHead):
"""classification head.
Args:
loss (dict): Config of classification loss.
topk (int | tuple): Top-k accuracy.
cal_acc (bool): Whether to calculate accuracy during training.
If you use Mixup/CutMix or something like that during training,
it is not reasonable to calculate accuracy. Defaults to False.
"""
def __init__(self,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, ),
cal_acc=False,
init_cfg=None):
super(ClsHead, self).__init__(init_cfg=init_cfg)
assert isinstance(loss, dict)
assert isinstance(topk, (int, tuple))
if isinstance(topk, int):
topk = (topk, )
for _topk in topk:
assert _topk > 0, 'Top-k should be larger than 0'
self.topk = topk
self.compute_loss = build_loss(loss)
self.compute_accuracy = Accuracy(topk=self.topk)
self.cal_acc = cal_acc
def loss(self, cls_score, gt_label, **kwargs):
num_samples = len(cls_score)
losses = dict()
# compute loss
loss = self.compute_loss(
cls_score, gt_label, avg_factor=num_samples, **kwargs)
if self.cal_acc:
# compute accuracy
acc = self.compute_accuracy(cls_score, gt_label)
assert len(acc) == len(self.topk)
losses['accuracy'] = {
f'top-{k}': a
for k, a in zip(self.topk, acc)
}
losses['loss'] = loss
return losses
def forward_train(self, cls_score, gt_label, **kwargs):
if isinstance(cls_score, tuple):
cls_score = cls_score[-1]
losses = self.loss(cls_score, gt_label, **kwargs)
return losses
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
warnings.warn(
'The input of ClsHead should be already logits. '
'Please modify the backbone if you want to get pre-logits feature.'
)
return x
def simple_test(self, cls_score, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
cls_score (tuple[Tensor]): The input classification score logits.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. The shape of every item should be
``(num_samples, num_classes)``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
if isinstance(cls_score, tuple):
cls_score = cls_score[-1]
if softmax:
pred = (
F.softmax(cls_score, dim=1) if cls_score is not None else None)
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
def post_process(self, pred):
on_trace = is_tracing()
if torch.onnx.is_in_onnx_export() or on_trace:
return pred
pred = list(pred.detach().cpu().numpy())
return pred
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn.utils.weight_init import trunc_normal_
from ..builder import HEADS
from .cls_head import ClsHead
@HEADS.register_module()
class ConformerHead(ClsHead):
"""Linear classifier head.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
init_cfg (dict | optional): The extra init config of layers.
Defaults to use ``dict(type='Normal', layer='Linear', std=0.01)``.
"""
def __init__(
self,
num_classes,
in_channels, # [conv_dim, trans_dim]
init_cfg=dict(type='Normal', layer='Linear', std=0.01),
*args,
**kwargs):
super(ConformerHead, self).__init__(init_cfg=None, *args, **kwargs)
self.in_channels = in_channels
self.num_classes = num_classes
self.init_cfg = init_cfg
if self.num_classes <= 0:
raise ValueError(
f'num_classes={num_classes} must be a positive integer')
self.conv_cls_head = nn.Linear(self.in_channels[0], num_classes)
self.trans_cls_head = nn.Linear(self.in_channels[1], num_classes)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight, std=.02)
if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
def init_weights(self):
super(ConformerHead, self).init_weights()
if (isinstance(self.init_cfg, dict)
and self.init_cfg['type'] == 'Pretrained'):
# Suppress default init if use pretrained model.
return
else:
self.apply(self._init_weights)
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
return x
def simple_test(self, x, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[tuple[tensor, tensor]]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. Every item should be a tuple which
includes convluation features and transformer features. The
shape of them should be ``(num_samples, in_channels[0])`` and
``(num_samples, in_channels[1])``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
x = self.pre_logits(x)
# There are two outputs in the Conformer model
assert len(x) == 2
conv_cls_score = self.conv_cls_head(x[0])
tran_cls_score = self.trans_cls_head(x[1])
if softmax:
cls_score = conv_cls_score + tran_cls_score
pred = (
F.softmax(cls_score, dim=1) if cls_score is not None else None)
if post_process:
pred = self.post_process(pred)
else:
pred = [conv_cls_score, tran_cls_score]
if post_process:
pred = list(map(self.post_process, pred))
return pred
def forward_train(self, x, gt_label):
x = self.pre_logits(x)
assert isinstance(x, list) and len(x) == 2, \
'There should be two outputs in the Conformer model'
conv_cls_score = self.conv_cls_head(x[0])
tran_cls_score = self.trans_cls_head(x[1])
losses = self.loss([conv_cls_score, tran_cls_score], gt_label)
return losses
def loss(self, cls_score, gt_label):
num_samples = len(cls_score[0])
losses = dict()
# compute loss
loss = sum([
self.compute_loss(score, gt_label, avg_factor=num_samples) /
len(cls_score) for score in cls_score
])
if self.cal_acc:
# compute accuracy
acc = self.compute_accuracy(cls_score[0] + cls_score[1], gt_label)
assert len(acc) == len(self.topk)
losses['accuracy'] = {
f'top-{k}': a
for k, a in zip(self.topk, acc)
}
losses['loss'] = loss
return losses
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
import torch.nn.functional as F
from mmcls.utils import get_root_logger
from ..builder import HEADS
from .vision_transformer_head import VisionTransformerClsHead
@HEADS.register_module()
class DeiTClsHead(VisionTransformerClsHead):
"""Distilled Vision Transformer classifier head.
Comparing with the :class:`VisionTransformerClsHead`, this head adds an
extra linear layer to handle the dist token. The final classification score
is the average of both linear transformation results of ``cls_token`` and
``dist_token``.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
hidden_dim (int): Number of the dimensions for hidden layer.
Defaults to None, which means no extra hidden layer.
act_cfg (dict): The activation config. Only available during
pre-training. Defaults to ``dict(type='Tanh')``.
init_cfg (dict): The extra initialization configs. Defaults to
``dict(type='Constant', layer='Linear', val=0)``.
"""
def __init__(self, *args, **kwargs):
super(DeiTClsHead, self).__init__(*args, **kwargs)
if self.hidden_dim is None:
head_dist = nn.Linear(self.in_channels, self.num_classes)
else:
head_dist = nn.Linear(self.hidden_dim, self.num_classes)
self.layers.add_module('head_dist', head_dist)
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
_, cls_token, dist_token = x
if self.hidden_dim is None:
return cls_token, dist_token
else:
cls_token = self.layers.act(self.layers.pre_logits(cls_token))
dist_token = self.layers.act(self.layers.pre_logits(dist_token))
return cls_token, dist_token
def simple_test(self, x, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[tuple[tensor, tensor, tensor]]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. Every item should be a tuple which
includes patch token, cls token and dist token. The cls token
and dist token will be used to classify and the shape of them
should be ``(num_samples, in_channels)``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
cls_token, dist_token = self.pre_logits(x)
cls_score = (self.layers.head(cls_token) +
self.layers.head_dist(dist_token)) / 2
if softmax:
pred = F.softmax(
cls_score, dim=1) if cls_score is not None else None
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
def forward_train(self, x, gt_label):
logger = get_root_logger()
logger.warning("MMClassification doesn't support to train the "
'distilled version DeiT.')
cls_token, dist_token = self.pre_logits(x)
cls_score = (self.layers.head(cls_token) +
self.layers.head_dist(dist_token)) / 2
losses = self.loss(cls_score, gt_label)
return losses
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
import torch.nn.functional as F
from ..builder import HEADS
from .cls_head import ClsHead
@HEADS.register_module()
class EfficientFormerClsHead(ClsHead):
"""EfficientFormer classifier head.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
distillation (bool): Whether use a additional distilled head.
Defaults to True.
init_cfg (dict): The extra initialization configs. Defaults to
``dict(type='Normal', layer='Linear', std=0.01)``.
"""
def __init__(self,
num_classes,
in_channels,
distillation=True,
init_cfg=dict(type='Normal', layer='Linear', std=0.01),
*args,
**kwargs):
super(EfficientFormerClsHead, self).__init__(
init_cfg=init_cfg, *args, **kwargs)
self.in_channels = in_channels
self.num_classes = num_classes
self.dist = distillation
if self.num_classes <= 0:
raise ValueError(
f'num_classes={num_classes} must be a positive integer')
self.head = nn.Linear(self.in_channels, self.num_classes)
if self.dist:
self.dist_head = nn.Linear(self.in_channels, self.num_classes)
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
return x
def simple_test(self, x, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[tuple[tensor, tensor]]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. Every item should be a tuple which
includes patch token and cls token. The cls token will be used
to classify and the shape of it should be
``(num_samples, in_channels)``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
x = self.pre_logits(x)
cls_score = self.head(x)
if self.dist:
cls_score = (cls_score + self.dist_head(x)) / 2
if softmax:
pred = (
F.softmax(cls_score, dim=1) if cls_score is not None else None)
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
def forward_train(self, x, gt_label, **kwargs):
if self.dist:
raise NotImplementedError(
"MMClassification doesn't support to train"
' the distilled version EfficientFormer.')
else:
x = self.pre_logits(x)
cls_score = self.head(x)
losses = self.loss(cls_score, gt_label, **kwargs)
return losses
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
import torch.nn.functional as F
from ..builder import HEADS
from .cls_head import ClsHead
@HEADS.register_module()
class LinearClsHead(ClsHead):
"""Linear classifier head.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
init_cfg (dict | optional): The extra init config of layers.
Defaults to use dict(type='Normal', layer='Linear', std=0.01).
"""
def __init__(self,
num_classes,
in_channels,
init_cfg=dict(type='Normal', layer='Linear', std=0.01),
*args,
**kwargs):
super(LinearClsHead, self).__init__(init_cfg=init_cfg, *args, **kwargs)
self.in_channels = in_channels
self.num_classes = num_classes
if self.num_classes <= 0:
raise ValueError(
f'num_classes={num_classes} must be a positive integer')
self.fc = nn.Linear(self.in_channels, self.num_classes)
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
return x
def simple_test(self, x, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[Tensor]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. The shape of every item should be
``(num_samples, in_channels)``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
x = self.pre_logits(x)
cls_score = self.fc(x)
if softmax:
pred = (
F.softmax(cls_score, dim=1) if cls_score is not None else None)
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
def forward_train(self, x, gt_label, **kwargs):
x = self.pre_logits(x)
cls_score = self.fc(x)
losses = self.loss(cls_score, gt_label, **kwargs)
return losses
# Copyright (c) OpenMMLab. All rights reserved.
# Modified from https://github.com/Kevinz-code/CSRA
import torch
import torch.nn as nn
from mmcv.runner import BaseModule, ModuleList
from ..builder import HEADS
from .multi_label_head import MultiLabelClsHead
@HEADS.register_module()
class CSRAClsHead(MultiLabelClsHead):
"""Class-specific residual attention classifier head.
Residual Attention: A Simple but Effective Method for Multi-Label
Recognition (ICCV 2021)
Please refer to the `paper <https://arxiv.org/abs/2108.02456>`__ for
details.
Args:
num_classes (int): Number of categories.
in_channels (int): Number of channels in the input feature map.
num_heads (int): Number of residual at tensor heads.
loss (dict): Config of classification loss.
lam (float): Lambda that combines global average and max pooling
scores.
init_cfg (dict | optional): The extra init config of layers.
Defaults to use dict(type='Normal', layer='Linear', std=0.01).
"""
temperature_settings = { # softmax temperature settings
1: [1],
2: [1, 99],
4: [1, 2, 4, 99],
6: [1, 2, 3, 4, 5, 99],
8: [1, 2, 3, 4, 5, 6, 7, 99]
}
def __init__(self,
num_classes,
in_channels,
num_heads,
lam,
loss=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=1.0),
init_cfg=dict(type='Normal', layer='Linear', std=0.01),
*args,
**kwargs):
assert num_heads in self.temperature_settings.keys(
), 'The num of heads is not in temperature setting.'
assert lam > 0, 'Lambda should be between 0 and 1.'
super(CSRAClsHead, self).__init__(
init_cfg=init_cfg, loss=loss, *args, **kwargs)
self.temp_list = self.temperature_settings[num_heads]
self.csra_heads = ModuleList([
CSRAModule(num_classes, in_channels, self.temp_list[i], lam)
for i in range(num_heads)
])
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
return x
def simple_test(self, x, post_process=True, **kwargs):
logit = 0.
x = self.pre_logits(x)
for head in self.csra_heads:
logit += head(x)
if post_process:
return self.post_process(logit)
else:
return logit
def forward_train(self, x, gt_label, **kwargs):
logit = 0.
x = self.pre_logits(x)
for head in self.csra_heads:
logit += head(x)
gt_label = gt_label.type_as(logit)
_gt_label = torch.abs(gt_label)
losses = self.loss(logit, _gt_label, **kwargs)
return losses
class CSRAModule(BaseModule):
"""Basic module of CSRA with different temperature.
Args:
num_classes (int): Number of categories.
in_channels (int): Number of channels in the input feature map.
T (int): Temperature setting.
lam (float): Lambda that combines global average and max pooling
scores.
init_cfg (dict | optional): The extra init config of layers.
Defaults to use dict(type='Normal', layer='Linear', std=0.01).
"""
def __init__(self, num_classes, in_channels, T, lam, init_cfg=None):
super(CSRAModule, self).__init__(init_cfg=init_cfg)
self.T = T # temperature
self.lam = lam # Lambda
self.head = nn.Conv2d(in_channels, num_classes, 1, bias=False)
self.softmax = nn.Softmax(dim=2)
def forward(self, x):
score = self.head(x) / torch.norm(
self.head.weight, dim=1, keepdim=True).transpose(0, 1)
score = score.flatten(2)
base_logit = torch.mean(score, dim=2)
if self.T == 99: # max-pooling
att_logit = torch.max(score, dim=2)[0]
else:
score_soft = self.softmax(score * self.T)
att_logit = torch.sum(score * score_soft, dim=2)
return base_logit + self.lam * att_logit
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from ..builder import HEADS, build_loss
from ..utils import is_tracing
from .base_head import BaseHead
@HEADS.register_module()
class MultiLabelClsHead(BaseHead):
"""Classification head for multilabel task.
Args:
loss (dict): Config of classification loss.
"""
def __init__(self,
loss=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=1.0),
init_cfg=None):
super(MultiLabelClsHead, self).__init__(init_cfg=init_cfg)
assert isinstance(loss, dict)
self.compute_loss = build_loss(loss)
def loss(self, cls_score, gt_label):
gt_label = gt_label.type_as(cls_score)
num_samples = len(cls_score)
losses = dict()
# map difficult examples to positive ones
_gt_label = torch.abs(gt_label)
# compute loss
loss = self.compute_loss(cls_score, _gt_label, avg_factor=num_samples)
losses['loss'] = loss
return losses
def forward_train(self, cls_score, gt_label, **kwargs):
if isinstance(cls_score, tuple):
cls_score = cls_score[-1]
gt_label = gt_label.type_as(cls_score)
losses = self.loss(cls_score, gt_label, **kwargs)
return losses
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
from mmcls.utils import get_root_logger
logger = get_root_logger()
logger.warning(
'The input of MultiLabelClsHead should be already logits. '
'Please modify the backbone if you want to get pre-logits feature.'
)
return x
def simple_test(self, x, sigmoid=True, post_process=True):
"""Inference without augmentation.
Args:
cls_score (tuple[Tensor]): The input classification score logits.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. The shape of every item should be
``(num_samples, num_classes)``.
sigmoid (bool): Whether to sigmoid the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
if isinstance(x, tuple):
x = x[-1]
if sigmoid:
pred = torch.sigmoid(x) if x is not None else None
else:
pred = x
if post_process:
return self.post_process(pred)
else:
return pred
def post_process(self, pred):
on_trace = is_tracing()
if torch.onnx.is_in_onnx_export() or on_trace:
return pred
pred = list(pred.detach().cpu().numpy())
return pred
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from ..builder import HEADS
from .multi_label_head import MultiLabelClsHead
@HEADS.register_module()
class MultiLabelLinearClsHead(MultiLabelClsHead):
"""Linear classification head for multilabel task.
Args:
num_classes (int): Number of categories.
in_channels (int): Number of channels in the input feature map.
loss (dict): Config of classification loss.
init_cfg (dict | optional): The extra init config of layers.
Defaults to use dict(type='Normal', layer='Linear', std=0.01).
"""
def __init__(self,
num_classes,
in_channels,
loss=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=1.0),
init_cfg=dict(type='Normal', layer='Linear', std=0.01)):
super(MultiLabelLinearClsHead, self).__init__(
loss=loss, init_cfg=init_cfg)
if num_classes <= 0:
raise ValueError(
f'num_classes={num_classes} must be a positive integer')
self.in_channels = in_channels
self.num_classes = num_classes
self.fc = nn.Linear(self.in_channels, self.num_classes)
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
return x
def forward_train(self, x, gt_label, **kwargs):
x = self.pre_logits(x)
gt_label = gt_label.type_as(x)
cls_score = self.fc(x)
losses = self.loss(cls_score, gt_label, **kwargs)
return losses
def simple_test(self, x, sigmoid=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[Tensor]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. The shape of every item should be
``(num_samples, in_channels)``.
sigmoid (bool): Whether to sigmoid the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
x = self.pre_logits(x)
cls_score = self.fc(x)
if sigmoid:
pred = torch.sigmoid(cls_score) if cls_score is not None else None
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Sequence
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import build_activation_layer, build_norm_layer
from mmcv.runner import BaseModule, ModuleList
from ..builder import HEADS
from .cls_head import ClsHead
class LinearBlock(BaseModule):
def __init__(self,
in_channels,
out_channels,
dropout_rate=0.,
norm_cfg=None,
act_cfg=None,
init_cfg=None):
super().__init__(init_cfg=init_cfg)
self.fc = nn.Linear(in_channels, out_channels)
self.norm = None
self.act = None
self.dropout = None
if norm_cfg is not None:
self.norm = build_norm_layer(norm_cfg, out_channels)[1]
if act_cfg is not None:
self.act = build_activation_layer(act_cfg)
if dropout_rate > 0:
self.dropout = nn.Dropout(p=dropout_rate)
def forward(self, x):
x = self.fc(x)
if self.norm is not None:
x = self.norm(x)
if self.act is not None:
x = self.act(x)
if self.dropout is not None:
x = self.dropout(x)
return x
@HEADS.register_module()
class StackedLinearClsHead(ClsHead):
"""Classifier head with several hidden fc layer and a output fc layer.
Args:
num_classes (int): Number of categories.
in_channels (int): Number of channels in the input feature map.
mid_channels (Sequence): Number of channels in the hidden fc layers.
dropout_rate (float): Dropout rate after each hidden fc layer,
except the last layer. Defaults to 0.
norm_cfg (dict, optional): Config dict of normalization layer after
each hidden fc layer, except the last layer. Defaults to None.
act_cfg (dict, optional): Config dict of activation function after each
hidden layer, except the last layer. Defaults to use "ReLU".
"""
def __init__(self,
num_classes: int,
in_channels: int,
mid_channels: Sequence,
dropout_rate: float = 0.,
norm_cfg: Dict = None,
act_cfg: Dict = dict(type='ReLU'),
**kwargs):
super(StackedLinearClsHead, self).__init__(**kwargs)
assert num_classes > 0, \
f'`num_classes` of StackedLinearClsHead must be a positive ' \
f'integer, got {num_classes} instead.'
self.num_classes = num_classes
self.in_channels = in_channels
assert isinstance(mid_channels, Sequence), \
f'`mid_channels` of StackedLinearClsHead should be a sequence, ' \
f'instead of {type(mid_channels)}'
self.mid_channels = mid_channels
self.dropout_rate = dropout_rate
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self._init_layers()
def _init_layers(self):
self.layers = ModuleList()
in_channels = self.in_channels
for hidden_channels in self.mid_channels:
self.layers.append(
LinearBlock(
in_channels,
hidden_channels,
dropout_rate=self.dropout_rate,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg))
in_channels = hidden_channels
self.layers.append(
LinearBlock(
self.mid_channels[-1],
self.num_classes,
dropout_rate=0.,
norm_cfg=None,
act_cfg=None))
def init_weights(self):
self.layers.init_weights()
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
for layer in self.layers[:-1]:
x = layer(x)
return x
@property
def fc(self):
return self.layers[-1]
def simple_test(self, x, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[Tensor]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. The shape of every item should be
``(num_samples, in_channels)``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
x = self.pre_logits(x)
cls_score = self.fc(x)
if softmax:
pred = (
F.softmax(cls_score, dim=1) if cls_score is not None else None)
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
def forward_train(self, x, gt_label, **kwargs):
x = self.pre_logits(x)
cls_score = self.fc(x)
losses = self.loss(cls_score, gt_label, **kwargs)
return losses
# Copyright (c) OpenMMLab. All rights reserved.
import math
from collections import OrderedDict
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import build_activation_layer
from mmcv.cnn.utils.weight_init import trunc_normal_
from mmcv.runner import Sequential
from ..builder import HEADS
from .cls_head import ClsHead
@HEADS.register_module()
class VisionTransformerClsHead(ClsHead):
"""Vision Transformer classifier head.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
hidden_dim (int): Number of the dimensions for hidden layer.
Defaults to None, which means no extra hidden layer.
act_cfg (dict): The activation config. Only available during
pre-training. Defaults to ``dict(type='Tanh')``.
init_cfg (dict): The extra initialization configs. Defaults to
``dict(type='Constant', layer='Linear', val=0)``.
"""
def __init__(self,
num_classes,
in_channels,
hidden_dim=None,
act_cfg=dict(type='Tanh'),
init_cfg=dict(type='Constant', layer='Linear', val=0),
*args,
**kwargs):
super(VisionTransformerClsHead, self).__init__(
init_cfg=init_cfg, *args, **kwargs)
self.in_channels = in_channels
self.num_classes = num_classes
self.hidden_dim = hidden_dim
self.act_cfg = act_cfg
if self.num_classes <= 0:
raise ValueError(
f'num_classes={num_classes} must be a positive integer')
self._init_layers()
def _init_layers(self):
if self.hidden_dim is None:
layers = [('head', nn.Linear(self.in_channels, self.num_classes))]
else:
layers = [
('pre_logits', nn.Linear(self.in_channels, self.hidden_dim)),
('act', build_activation_layer(self.act_cfg)),
('head', nn.Linear(self.hidden_dim, self.num_classes)),
]
self.layers = Sequential(OrderedDict(layers))
def init_weights(self):
super(VisionTransformerClsHead, self).init_weights()
# Modified from ClassyVision
if hasattr(self.layers, 'pre_logits'):
# Lecun norm
trunc_normal_(
self.layers.pre_logits.weight,
std=math.sqrt(1 / self.layers.pre_logits.in_features))
nn.init.zeros_(self.layers.pre_logits.bias)
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
_, cls_token = x
if self.hidden_dim is None:
return cls_token
else:
x = self.layers.pre_logits(cls_token)
return self.layers.act(x)
def simple_test(self, x, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[tuple[tensor, tensor]]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. Every item should be a tuple which
includes patch token and cls token. The cls token will be used
to classify and the shape of it should be
``(num_samples, in_channels)``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
x = self.pre_logits(x)
cls_score = self.layers.head(x)
if softmax:
pred = (
F.softmax(cls_score, dim=1) if cls_score is not None else None)
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
def forward_train(self, x, gt_label, **kwargs):
x = self.pre_logits(x)
cls_score = self.layers.head(x)
losses = self.loss(cls_score, gt_label, **kwargs)
return losses
# Copyright (c) OpenMMLab. All rights reserved.
from .accuracy import Accuracy, accuracy
from .asymmetric_loss import AsymmetricLoss, asymmetric_loss
from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
cross_entropy)
from .focal_loss import FocalLoss, sigmoid_focal_loss
from .label_smooth_loss import LabelSmoothLoss
from .seesaw_loss import SeesawLoss
from .utils import (convert_to_one_hot, reduce_loss, weight_reduce_loss,
weighted_loss)
__all__ = [
'accuracy', 'Accuracy', 'asymmetric_loss', 'AsymmetricLoss',
'cross_entropy', 'binary_cross_entropy', 'CrossEntropyLoss', 'reduce_loss',
'weight_reduce_loss', 'LabelSmoothLoss', 'weighted_loss', 'FocalLoss',
'sigmoid_focal_loss', 'convert_to_one_hot', 'SeesawLoss'
]
# Copyright (c) OpenMMLab. All rights reserved.
from numbers import Number
import numpy as np
import torch
import torch.nn as nn
def accuracy_numpy(pred, target, topk=(1, ), thrs=0.):
if isinstance(thrs, Number):
thrs = (thrs, )
res_single = True
elif isinstance(thrs, tuple):
res_single = False
else:
raise TypeError(
f'thrs should be a number or tuple, but got {type(thrs)}.')
res = []
maxk = max(topk)
num = pred.shape[0]
static_inds = np.indices((num, maxk))[0]
pred_label = pred.argpartition(-maxk, axis=1)[:, -maxk:]
pred_score = pred[static_inds, pred_label]
sort_inds = np.argsort(pred_score, axis=1)[:, ::-1]
pred_label = pred_label[static_inds, sort_inds]
pred_score = pred_score[static_inds, sort_inds]
for k in topk:
correct_k = pred_label[:, :k] == target.reshape(-1, 1)
res_thr = []
for thr in thrs:
# Only prediction values larger than thr are counted as correct
_correct_k = correct_k & (pred_score[:, :k] > thr)
_correct_k = np.logical_or.reduce(_correct_k, axis=1)
res_thr.append((_correct_k.sum() * 100. / num))
if res_single:
res.append(res_thr[0])
else:
res.append(res_thr)
return res
def accuracy_torch(pred, target, topk=(1, ), thrs=0.):
if isinstance(thrs, Number):
thrs = (thrs, )
res_single = True
elif isinstance(thrs, tuple):
res_single = False
else:
raise TypeError(
f'thrs should be a number or tuple, but got {type(thrs)}.')
res = []
maxk = max(topk)
num = pred.size(0)
pred = pred.float()
pred_score, pred_label = pred.topk(maxk, dim=1)
pred_label = pred_label.t()
correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
for k in topk:
res_thr = []
for thr in thrs:
# Only prediction values larger than thr are counted as correct
_correct = correct & (pred_score.t() > thr)
correct_k = _correct[:k].reshape(-1).float().sum(0, keepdim=True)
res_thr.append((correct_k.mul_(100. / num)))
if res_single:
res.append(res_thr[0])
else:
res.append(res_thr)
return res
def accuracy(pred, target, topk=1, thrs=0.):
"""Calculate accuracy according to the prediction and target.
Args:
pred (torch.Tensor | np.array): The model prediction.
target (torch.Tensor | np.array): The target of each prediction
topk (int | tuple[int]): If the predictions in ``topk``
matches the target, the predictions will be regarded as
correct ones. Defaults to 1.
thrs (Number | tuple[Number], optional): Predictions with scores under
the thresholds are considered negative. Default to 0.
Returns:
torch.Tensor | list[torch.Tensor] | list[list[torch.Tensor]]: Accuracy
- torch.Tensor: If both ``topk`` and ``thrs`` is a single value.
- list[torch.Tensor]: If one of ``topk`` or ``thrs`` is a tuple.
- list[list[torch.Tensor]]: If both ``topk`` and ``thrs`` is a \
tuple. And the first dim is ``topk``, the second dim is ``thrs``.
"""
assert isinstance(topk, (int, tuple))
if isinstance(topk, int):
topk = (topk, )
return_single = True
else:
return_single = False
assert isinstance(pred, (torch.Tensor, np.ndarray)), \
f'The pred should be torch.Tensor or np.ndarray ' \
f'instead of {type(pred)}.'
assert isinstance(target, (torch.Tensor, np.ndarray)), \
f'The target should be torch.Tensor or np.ndarray ' \
f'instead of {type(target)}.'
# torch version is faster in most situations.
to_tensor = (lambda x: torch.from_numpy(x)
if isinstance(x, np.ndarray) else x)
pred = to_tensor(pred)
target = to_tensor(target)
res = accuracy_torch(pred, target, topk, thrs)
return res[0] if return_single else res
class Accuracy(nn.Module):
def __init__(self, topk=(1, )):
"""Module to calculate the accuracy.
Args:
topk (tuple): The criterion used to calculate the
accuracy. Defaults to (1,).
"""
super().__init__()
self.topk = topk
def forward(self, pred, target):
"""Forward function to calculate accuracy.
Args:
pred (torch.Tensor): Prediction of models.
target (torch.Tensor): Target for each prediction.
Returns:
list[torch.Tensor]: The accuracies under different topk criterions.
"""
return accuracy(pred, target, self.topk)
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from ..builder import LOSSES
from .utils import convert_to_one_hot, weight_reduce_loss
def asymmetric_loss(pred,
target,
weight=None,
gamma_pos=1.0,
gamma_neg=4.0,
clip=0.05,
reduction='mean',
avg_factor=None,
use_sigmoid=True,
eps=1e-8):
r"""asymmetric loss.
Please refer to the `paper <https://arxiv.org/abs/2009.14119>`__ for
details.
Args:
pred (torch.Tensor): The prediction with shape (N, \*).
target (torch.Tensor): The ground truth label of the prediction with
shape (N, \*).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, ). Defaults to None.
gamma_pos (float): positive focusing parameter. Defaults to 0.0.
gamma_neg (float): Negative focusing parameter. We usually set
gamma_neg > gamma_pos. Defaults to 4.0.
clip (float, optional): Probability margin. Defaults to 0.05.
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". If reduction is 'none' , loss
is same shape as pred and label. Defaults to 'mean'.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
use_sigmoid (bool): Whether the prediction uses sigmoid instead
of softmax. Defaults to True.
eps (float): The minimum value of the argument of logarithm. Defaults
to 1e-8.
Returns:
torch.Tensor: Loss.
"""
assert pred.shape == \
target.shape, 'pred and target should be in the same shape.'
if use_sigmoid:
pred_sigmoid = pred.sigmoid()
else:
pred_sigmoid = nn.functional.softmax(pred, dim=-1)
target = target.type_as(pred)
if clip and clip > 0:
pt = (1 - pred_sigmoid +
clip).clamp(max=1) * (1 - target) + pred_sigmoid * target
else:
pt = (1 - pred_sigmoid) * (1 - target) + pred_sigmoid * target
asymmetric_weight = (1 - pt).pow(gamma_pos * target + gamma_neg *
(1 - target))
loss = -torch.log(pt.clamp(min=eps)) * asymmetric_weight
if weight is not None:
assert weight.dim() == 1
weight = weight.float()
if pred.dim() > 1:
weight = weight.reshape(-1, 1)
loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
return loss
@LOSSES.register_module()
class AsymmetricLoss(nn.Module):
"""asymmetric loss.
Args:
gamma_pos (float): positive focusing parameter.
Defaults to 0.0.
gamma_neg (float): Negative focusing parameter. We
usually set gamma_neg > gamma_pos. Defaults to 4.0.
clip (float, optional): Probability margin. Defaults to 0.05.
reduction (str): The method used to reduce the loss into
a scalar.
loss_weight (float): Weight of loss. Defaults to 1.0.
use_sigmoid (bool): Whether the prediction uses sigmoid instead
of softmax. Defaults to True.
eps (float): The minimum value of the argument of logarithm. Defaults
to 1e-8.
"""
def __init__(self,
gamma_pos=0.0,
gamma_neg=4.0,
clip=0.05,
reduction='mean',
loss_weight=1.0,
use_sigmoid=True,
eps=1e-8):
super(AsymmetricLoss, self).__init__()
self.gamma_pos = gamma_pos
self.gamma_neg = gamma_neg
self.clip = clip
self.reduction = reduction
self.loss_weight = loss_weight
self.use_sigmoid = use_sigmoid
self.eps = eps
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
r"""asymmetric loss.
Args:
pred (torch.Tensor): The prediction with shape (N, \*).
target (torch.Tensor): The ground truth label of the prediction
with shape (N, \*), N or (N,1).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, \*). Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The method used to reduce the
loss into a scalar. Options are "none", "mean" and "sum".
Defaults to None.
Returns:
torch.Tensor: Loss.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if target.dim() == 1 or (target.dim() == 2 and target.shape[1] == 1):
target = convert_to_one_hot(target.view(-1, 1), pred.shape[-1])
loss_cls = self.loss_weight * asymmetric_loss(
pred,
target,
weight,
gamma_pos=self.gamma_pos,
gamma_neg=self.gamma_neg,
clip=self.clip,
reduction=reduction,
avg_factor=avg_factor,
use_sigmoid=self.use_sigmoid,
eps=self.eps)
return loss_cls
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import weight_reduce_loss
def cross_entropy(pred,
label,
weight=None,
reduction='mean',
avg_factor=None,
class_weight=None):
"""Calculate the CrossEntropy loss.
Args:
pred (torch.Tensor): The prediction with shape (N, C), C is the number
of classes.
label (torch.Tensor): The gt label of the prediction.
weight (torch.Tensor, optional): Sample-wise loss weight.
reduction (str): The method used to reduce the loss.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
class_weight (torch.Tensor, optional): The weight for each class with
shape (C), C is the number of classes. Default None.
Returns:
torch.Tensor: The calculated loss
"""
# element-wise losses
loss = F.cross_entropy(pred, label, weight=class_weight, reduction='none')
# apply weights and do the reduction
if weight is not None:
weight = weight.float()
loss = weight_reduce_loss(
loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
return loss
def soft_cross_entropy(pred,
label,
weight=None,
reduction='mean',
class_weight=None,
avg_factor=None):
"""Calculate the Soft CrossEntropy loss. The label can be float.
Args:
pred (torch.Tensor): The prediction with shape (N, C), C is the number
of classes.
label (torch.Tensor): The gt label of the prediction with shape (N, C).
When using "mixup", the label can be float.
weight (torch.Tensor, optional): Sample-wise loss weight.
reduction (str): The method used to reduce the loss.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
class_weight (torch.Tensor, optional): The weight for each class with
shape (C), C is the number of classes. Default None.
Returns:
torch.Tensor: The calculated loss
"""
# element-wise losses
loss = -label * F.log_softmax(pred, dim=-1)
if class_weight is not None:
loss *= class_weight
loss = loss.sum(dim=-1)
# apply weights and do the reduction
if weight is not None:
weight = weight.float()
loss = weight_reduce_loss(
loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
return loss
def binary_cross_entropy(pred,
label,
weight=None,
reduction='mean',
avg_factor=None,
class_weight=None,
pos_weight=None):
r"""Calculate the binary CrossEntropy loss with logits.
Args:
pred (torch.Tensor): The prediction with shape (N, \*).
label (torch.Tensor): The gt label with shape (N, \*).
weight (torch.Tensor, optional): Element-wise weight of loss with shape
(N, ). Defaults to None.
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". If reduction is 'none' , loss
is same shape as pred and label. Defaults to 'mean'.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
class_weight (torch.Tensor, optional): The weight for each class with
shape (C), C is the number of classes. Default None.
pos_weight (torch.Tensor, optional): The positive weight for each
class with shape (C), C is the number of classes. Default None.
Returns:
torch.Tensor: The calculated loss
"""
# Ensure that the size of class_weight is consistent with pred and label to
# avoid automatic boracast,
assert pred.dim() == label.dim()
if class_weight is not None:
N = pred.size()[0]
class_weight = class_weight.repeat(N, 1)
loss = F.binary_cross_entropy_with_logits(
pred,
label,
weight=class_weight,
pos_weight=pos_weight,
reduction='none')
# apply weights and do the reduction
if weight is not None:
assert weight.dim() == 1
weight = weight.float()
if pred.dim() > 1:
weight = weight.reshape(-1, 1)
loss = weight_reduce_loss(
loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
return loss
@LOSSES.register_module()
class CrossEntropyLoss(nn.Module):
"""Cross entropy loss.
Args:
use_sigmoid (bool): Whether the prediction uses sigmoid
of softmax. Defaults to False.
use_soft (bool): Whether to use the soft version of CrossEntropyLoss.
Defaults to False.
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". Defaults to 'mean'.
loss_weight (float): Weight of the loss. Defaults to 1.0.
class_weight (List[float], optional): The weight for each class with
shape (C), C is the number of classes. Default None.
pos_weight (List[float], optional): The positive weight for each
class with shape (C), C is the number of classes. Only enabled in
BCE loss when ``use_sigmoid`` is True. Default None.
"""
def __init__(self,
use_sigmoid=False,
use_soft=False,
reduction='mean',
loss_weight=1.0,
class_weight=None,
pos_weight=None):
super(CrossEntropyLoss, self).__init__()
self.use_sigmoid = use_sigmoid
self.use_soft = use_soft
assert not (
self.use_soft and self.use_sigmoid
), 'use_sigmoid and use_soft could not be set simultaneously'
self.reduction = reduction
self.loss_weight = loss_weight
self.class_weight = class_weight
self.pos_weight = pos_weight
if self.use_sigmoid:
self.cls_criterion = binary_cross_entropy
elif self.use_soft:
self.cls_criterion = soft_cross_entropy
else:
self.cls_criterion = cross_entropy
def forward(self,
cls_score,
label,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if self.class_weight is not None:
class_weight = cls_score.new_tensor(self.class_weight)
else:
class_weight = None
# only BCE loss has pos_weight
if self.pos_weight is not None and self.use_sigmoid:
pos_weight = cls_score.new_tensor(self.pos_weight)
kwargs.update({'pos_weight': pos_weight})
else:
pos_weight = None
loss_cls = self.loss_weight * self.cls_criterion(
cls_score,
label,
weight,
class_weight=class_weight,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss_cls
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import weight_reduce_loss
from .utils import convert_to_one_hot, weight_reduce_loss
def sigmoid_focal_loss(pred,
......@@ -12,14 +13,14 @@ def sigmoid_focal_loss(pred,
alpha=0.25,
reduction='mean',
avg_factor=None):
"""Sigmoid focal loss.
r"""Sigmoid focal loss.
Args:
pred (torch.Tensor): The prediction with shape (N, *).
pred (torch.Tensor): The prediction with shape (N, \*).
target (torch.Tensor): The ground truth label of the prediction with
shape (N, *).
shape (N, \*).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, ). Dafaults to None.
(N, ). Defaults to None.
gamma (float): The gamma for calculating the modulating factor.
Defaults to 2.0.
alpha (float): A balanced form for Focal Loss. Defaults to 0.25.
......@@ -82,16 +83,16 @@ class FocalLoss(nn.Module):
weight=None,
avg_factor=None,
reduction_override=None):
"""Sigmoid focal loss.
r"""Sigmoid focal loss.
Args:
pred (torch.Tensor): The prediction with shape (N, *).
pred (torch.Tensor): The prediction with shape (N, \*).
target (torch.Tensor): The ground truth label of the prediction
with shape (N, *).
with shape (N, \*), N or (N,1).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, *). Dafaults to None.
(N, \*). Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
the loss. Defaults to None.
reduction_override (str, optional): The method used to reduce the
loss into a scalar. Options are "none", "mean" and "sum".
Defaults to None.
......@@ -102,6 +103,8 @@ class FocalLoss(nn.Module):
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if target.dim() == 1 or (target.dim() == 2 and target.shape[1] == 1):
target = convert_to_one_hot(target.view(-1, 1), pred.shape[-1])
loss_cls = self.loss_weight * sigmoid_focal_loss(
pred,
target,
......
import warnings
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
......@@ -10,9 +9,10 @@ from .utils import convert_to_one_hot
@LOSSES.register_module()
class LabelSmoothLoss(nn.Module):
r"""Intializer for the label smoothed cross entropy loss.
Refers to `Rethinking the Inception Architecture for Computer Vision` -
https://arxiv.org/abs/1512.00567
r"""Initializer for the label smoothed cross entropy loss.
Refers to `Rethinking the Inception Architecture for Computer Vision
<https://arxiv.org/abs/1512.00567>`_
This decreases gap between output scores and encourages generalization.
Labels provided to forward can be one-hot like vectors (NxC) or class
......@@ -24,7 +24,7 @@ class LabelSmoothLoss(nn.Module):
label_smooth_val (float): The degree of label smoothing.
num_classes (int, optional): Number of classes. Defaults to None.
mode (str): Refers to notes, Options are 'original', 'classy_vision',
'multi_label'. Defaults to 'classy_vision'
'multi_label'. Defaults to 'original'
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". Defaults to 'mean'.
loss_weight (float): Weight of the loss. Defaults to 1.0.
......@@ -34,7 +34,7 @@ class LabelSmoothLoss(nn.Module):
as the original paper as:
.. math::
(1-\epsilon)\delta_{k, y} + \frac{\epsilon}{K}
(1-\epsilon)\delta_{k, y} + \frac{\epsilon}{K}
where epsilon is the `label_smooth_val`, K is the num_classes and
delta(k,y) is Dirac delta, which equals 1 for k=y and 0 otherwise.
......@@ -43,19 +43,19 @@ class LabelSmoothLoss(nn.Module):
method as the facebookresearch/ClassyVision repo as:
.. math::
\frac{\delta_{k, y} + \epsilon/K}{1+\epsilon}
\frac{\delta_{k, y} + \epsilon/K}{1+\epsilon}
if the mode is "multi_label", this will accept labels from multi-label
task and smoothing them as:
.. math::
(1-2\epsilon)\delta_{k, y} + \epsilon
(1-2\epsilon)\delta_{k, y} + \epsilon
"""
def __init__(self,
label_smooth_val,
num_classes=None,
mode=None,
mode='original',
reduction='mean',
loss_weight=1.0):
super().__init__()
......@@ -74,14 +74,6 @@ class LabelSmoothLoss(nn.Module):
f'but gets {mode}.'
self.reduction = reduction
if mode is None:
warnings.warn(
'LabelSmoothLoss mode is not set, use "classy_vision" '
'by default. The default value will be changed to '
'"original" recently. Please set mode manually if want '
'to keep "classy_vision".', UserWarning)
mode = 'classy_vision'
accept_mode = {'original', 'classy_vision', 'multi_label'}
assert mode in accept_mode, \
f'LabelSmoothLoss supports mode {accept_mode}, but gets {mode}.'
......@@ -124,6 +116,23 @@ class LabelSmoothLoss(nn.Module):
avg_factor=None,
reduction_override=None,
**kwargs):
r"""Label smooth loss.
Args:
pred (torch.Tensor): The prediction with shape (N, \*).
label (torch.Tensor): The ground truth label of the prediction
with shape (N, \*).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, \*). Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The method used to reduce the
loss into a scalar. Options are "none", "mean" and "sum".
Defaults to None.
Returns:
torch.Tensor: Loss.
"""
if self.num_classes is not None:
assert self.num_classes == cls_score.shape[1], \
f'num_classes should equal to cls_score.shape[1], ' \
......
# Copyright (c) OpenMMLab. All rights reserved.
# migrate from mmdetection with modifications
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import weight_reduce_loss
def seesaw_ce_loss(cls_score,
labels,
weight,
cum_samples,
num_classes,
p,
q,
eps,
reduction='mean',
avg_factor=None):
"""Calculate the Seesaw CrossEntropy loss.
Args:
cls_score (torch.Tensor): The prediction with shape (N, C),
C is the number of classes.
labels (torch.Tensor): The learning label of the prediction.
weight (torch.Tensor): Sample-wise loss weight.
cum_samples (torch.Tensor): Cumulative samples for each category.
num_classes (int): The number of classes.
p (float): The ``p`` in the mitigation factor.
q (float): The ``q`` in the compenstation factor.
eps (float): The minimal value of divisor to smooth
the computation of compensation factor
reduction (str, optional): The method used to reduce the loss.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
torch.Tensor: The calculated loss
"""
assert cls_score.size(-1) == num_classes
assert len(cum_samples) == num_classes
onehot_labels = F.one_hot(labels, num_classes)
seesaw_weights = cls_score.new_ones(onehot_labels.size())
# mitigation factor
if p > 0:
sample_ratio_matrix = cum_samples[None, :].clamp(
min=1) / cum_samples[:, None].clamp(min=1)
index = (sample_ratio_matrix < 1.0).float()
sample_weights = sample_ratio_matrix.pow(p) * index + (1 - index
) # M_{ij}
mitigation_factor = sample_weights[labels.long(), :]
seesaw_weights = seesaw_weights * mitigation_factor
# compensation factor
if q > 0:
scores = F.softmax(cls_score.detach(), dim=1)
self_scores = scores[
torch.arange(0, len(scores)).to(scores.device).long(),
labels.long()]
score_matrix = scores / self_scores[:, None].clamp(min=eps)
index = (score_matrix > 1.0).float()
compensation_factor = score_matrix.pow(q) * index + (1 - index)
seesaw_weights = seesaw_weights * compensation_factor
cls_score = cls_score + (seesaw_weights.log() * (1 - onehot_labels))
loss = F.cross_entropy(cls_score, labels, weight=None, reduction='none')
if weight is not None:
weight = weight.float()
loss = weight_reduce_loss(
loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
return loss
@LOSSES.register_module()
class SeesawLoss(nn.Module):
"""Implementation of seesaw loss.
Refers to `Seesaw Loss for Long-Tailed Instance Segmentation (CVPR 2021)
<https://arxiv.org/abs/2008.10032>`_
Args:
use_sigmoid (bool): Whether the prediction uses sigmoid of softmax.
Only False is supported. Defaults to False.
p (float): The ``p`` in the mitigation factor.
Defaults to 0.8.
q (float): The ``q`` in the compenstation factor.
Defaults to 2.0.
num_classes (int): The number of classes.
Default to 1000 for the ImageNet dataset.
eps (float): The minimal value of divisor to smooth
the computation of compensation factor, default to 1e-2.
reduction (str): The method that reduces the loss to a scalar.
Options are "none", "mean" and "sum". Default to "mean".
loss_weight (float): The weight of the loss. Defaults to 1.0
"""
def __init__(self,
use_sigmoid=False,
p=0.8,
q=2.0,
num_classes=1000,
eps=1e-2,
reduction='mean',
loss_weight=1.0):
super(SeesawLoss, self).__init__()
assert not use_sigmoid, '`use_sigmoid` is not supported'
self.use_sigmoid = False
self.p = p
self.q = q
self.num_classes = num_classes
self.eps = eps
self.reduction = reduction
self.loss_weight = loss_weight
self.cls_criterion = seesaw_ce_loss
# cumulative samples for each category
self.register_buffer('cum_samples',
torch.zeros(self.num_classes, dtype=torch.float))
def forward(self,
cls_score,
labels,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
cls_score (torch.Tensor): The prediction with shape (N, C).
labels (torch.Tensor): The learning label of the prediction.
weight (torch.Tensor, optional): Sample-wise loss weight.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction (str, optional): The method used to reduce the loss.
Options are "none", "mean" and "sum".
Returns:
torch.Tensor: The calculated loss
"""
assert reduction_override in (None, 'none', 'mean', 'sum'), \
f'The `reduction_override` should be one of (None, "none", ' \
f'"mean", "sum"), but get "{reduction_override}".'
assert cls_score.size(0) == labels.view(-1).size(0), \
f'Expected `labels` shape [{cls_score.size(0)}], ' \
f'but got {list(labels.size())}'
reduction = (
reduction_override if reduction_override else self.reduction)
assert cls_score.size(-1) == self.num_classes, \
f'The channel number of output ({cls_score.size(-1)}) does ' \
f'not match the `num_classes` of seesaw loss ({self.num_classes}).'
# accumulate the samples for each category
unique_labels = labels.unique()
for u_l in unique_labels:
inds_ = labels == u_l.item()
self.cum_samples[u_l] += inds_.sum()
if weight is not None:
weight = weight.float()
else:
weight = labels.new_ones(labels.size(), dtype=torch.float)
# calculate loss_cls_classes
loss_cls = self.loss_weight * self.cls_criterion(
cls_score, labels, weight, self.cum_samples, self.num_classes,
self.p, self.q, self.eps, reduction, avg_factor)
return loss_cls
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment