Commit a4372e17 authored by huchen's avatar huchen
Browse files

Merge branch 'hepj_work' into 'main'

增加conformer代码

See merge request dcutoolkit/deeplearing/dlexamples_new!7
parents 7f99c1c3 142dcf29
import torch.nn as nn
from ..builder import LOSSES
from .utils import weighted_loss
@weighted_loss
def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):
"""`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in gaussian
distribution.
Args:
pred (torch.Tensor): The prediction.
gaussian_target (torch.Tensor): The learning target of the prediction
in gaussian distribution.
alpha (float, optional): A balanced form for Focal Loss.
Defaults to 2.0.
gamma (float, optional): The gamma for calculating the modulating
factor. Defaults to 4.0.
"""
eps = 1e-12
pos_weights = gaussian_target.eq(1)
neg_weights = (1 - gaussian_target).pow(gamma)
pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
return pos_loss + neg_loss
@LOSSES.register_module()
class GaussianFocalLoss(nn.Module):
"""GaussianFocalLoss is a variant of focal loss.
More details can be found in the `paper
<https://arxiv.org/abs/1808.01244>`_
Code is modified from `kp_utils.py
<https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L152>`_ # noqa: E501
Please notice that the target in GaussianFocalLoss is a gaussian heatmap,
not 0/1 binary target.
Args:
alpha (float): Power of prediction.
gamma (float): Power of target for negtive samples.
reduction (str): Options are "none", "mean" and "sum".
loss_weight (float): Loss weight of current loss.
"""
def __init__(self,
alpha=2.0,
gamma=4.0,
reduction='mean',
loss_weight=1.0):
super(GaussianFocalLoss, self).__init__()
self.alpha = alpha
self.gamma = gamma
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction
in gaussian distribution.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_reg = self.loss_weight * gaussian_focal_loss(
pred,
target,
weight,
alpha=self.alpha,
gamma=self.gamma,
reduction=reduction,
avg_factor=avg_factor)
return loss_reg
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import weighted_loss
@weighted_loss
def quality_focal_loss(pred, target, beta=2.0):
r"""Quality Focal Loss (QFL) is from `Generalized Focal Loss: Learning
Qualified and Distributed Bounding Boxes for Dense Object Detection
<https://arxiv.org/abs/2006.04388>`_.
Args:
pred (torch.Tensor): Predicted joint representation of classification
and quality (IoU) estimation with shape (N, C), C is the number of
classes.
target (tuple([torch.Tensor])): Target category label with shape (N,)
and target quality label with shape (N,).
beta (float): The beta parameter for calculating the modulating factor.
Defaults to 2.0.
Returns:
torch.Tensor: Loss tensor with shape (N,).
"""
assert len(target) == 2, """target for QFL must be a tuple of two elements,
including category label and quality label, respectively"""
# label denotes the category id, score denotes the quality score
label, score = target
# negatives are supervised by 0 quality score
pred_sigmoid = pred.sigmoid()
scale_factor = pred_sigmoid
zerolabel = scale_factor.new_zeros(pred.shape)
loss = F.binary_cross_entropy_with_logits(
pred, zerolabel, reduction='none') * scale_factor.pow(beta)
# FG cat_id: [0, num_classes -1], BG cat_id: num_classes
bg_class_ind = pred.size(1)
pos = ((label >= 0) & (label < bg_class_ind)).nonzero().squeeze(1)
pos_label = label[pos].long()
# positives are supervised by bbox quality (IoU) score
scale_factor = score[pos] - pred_sigmoid[pos, pos_label]
loss[pos, pos_label] = F.binary_cross_entropy_with_logits(
pred[pos, pos_label], score[pos],
reduction='none') * scale_factor.abs().pow(beta)
loss = loss.sum(dim=1, keepdim=False)
return loss
@weighted_loss
def distribution_focal_loss(pred, label):
r"""Distribution Focal Loss (DFL) is from `Generalized Focal Loss: Learning
Qualified and Distributed Bounding Boxes for Dense Object Detection
<https://arxiv.org/abs/2006.04388>`_.
Args:
pred (torch.Tensor): Predicted general distribution of bounding boxes
(before softmax) with shape (N, n+1), n is the max value of the
integral set `{0, ..., n}` in paper.
label (torch.Tensor): Target distance label for bounding boxes with
shape (N,).
Returns:
torch.Tensor: Loss tensor with shape (N,).
"""
dis_left = label.long()
dis_right = dis_left + 1
weight_left = dis_right.float() - label
weight_right = label - dis_left.float()
loss = F.cross_entropy(pred, dis_left, reduction='none') * weight_left \
+ F.cross_entropy(pred, dis_right, reduction='none') * weight_right
return loss
@LOSSES.register_module()
class QualityFocalLoss(nn.Module):
r"""Quality Focal Loss (QFL) is a variant of `Generalized Focal Loss:
Learning Qualified and Distributed Bounding Boxes for Dense Object
Detection <https://arxiv.org/abs/2006.04388>`_.
Args:
use_sigmoid (bool): Whether sigmoid operation is conducted in QFL.
Defaults to True.
beta (float): The beta parameter for calculating the modulating factor.
Defaults to 2.0.
reduction (str): Options are "none", "mean" and "sum".
loss_weight (float): Loss weight of current loss.
"""
def __init__(self,
use_sigmoid=True,
beta=2.0,
reduction='mean',
loss_weight=1.0):
super(QualityFocalLoss, self).__init__()
assert use_sigmoid is True, 'Only sigmoid in QFL supported now.'
self.use_sigmoid = use_sigmoid
self.beta = beta
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (torch.Tensor): Predicted joint representation of
classification and quality (IoU) estimation with shape (N, C),
C is the number of classes.
target (tuple([torch.Tensor])): Target category label with shape
(N,) and target quality label with shape (N,).
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if self.use_sigmoid:
loss_cls = self.loss_weight * quality_focal_loss(
pred,
target,
weight,
beta=self.beta,
reduction=reduction,
avg_factor=avg_factor)
else:
raise NotImplementedError
return loss_cls
@LOSSES.register_module()
class DistributionFocalLoss(nn.Module):
r"""Distribution Focal Loss (DFL) is a variant of `Generalized Focal Loss:
Learning Qualified and Distributed Bounding Boxes for Dense Object
Detection <https://arxiv.org/abs/2006.04388>`_.
Args:
reduction (str): Options are `'none'`, `'mean'` and `'sum'`.
loss_weight (float): Loss weight of current loss.
"""
def __init__(self, reduction='mean', loss_weight=1.0):
super(DistributionFocalLoss, self).__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (torch.Tensor): Predicted general distribution of bounding
boxes (before softmax) with shape (N, n+1), n is the max value
of the integral set `{0, ..., n}` in paper.
target (torch.Tensor): Target distance label for bounding boxes
with shape (N,).
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_cls = self.loss_weight * distribution_focal_loss(
pred, target, weight, reduction=reduction, avg_factor=avg_factor)
return loss_cls
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
def _expand_onehot_labels(labels, label_weights, label_channels):
bin_labels = labels.new_full((labels.size(0), label_channels), 0)
inds = torch.nonzero(
(labels >= 0) & (labels < label_channels), as_tuple=False).squeeze()
if inds.numel() > 0:
bin_labels[inds, labels[inds]] = 1
bin_label_weights = label_weights.view(-1, 1).expand(
label_weights.size(0), label_channels)
return bin_labels, bin_label_weights
# TODO: code refactoring to make it consistent with other losses
@LOSSES.register_module()
class GHMC(nn.Module):
"""GHM Classification Loss.
Details of the theorem can be viewed in the paper
`Gradient Harmonized Single-stage Detector
<https://arxiv.org/abs/1811.05181>`_.
Args:
bins (int): Number of the unit regions for distribution calculation.
momentum (float): The parameter for moving average.
use_sigmoid (bool): Can only be true for BCE based loss now.
loss_weight (float): The weight of the total GHM-C loss.
"""
def __init__(self, bins=10, momentum=0, use_sigmoid=True, loss_weight=1.0):
super(GHMC, self).__init__()
self.bins = bins
self.momentum = momentum
edges = torch.arange(bins + 1).float() / bins
self.register_buffer('edges', edges)
self.edges[-1] += 1e-6
if momentum > 0:
acc_sum = torch.zeros(bins)
self.register_buffer('acc_sum', acc_sum)
self.use_sigmoid = use_sigmoid
if not self.use_sigmoid:
raise NotImplementedError
self.loss_weight = loss_weight
def forward(self, pred, target, label_weight, *args, **kwargs):
"""Calculate the GHM-C loss.
Args:
pred (float tensor of size [batch_num, class_num]):
The direct prediction of classification fc layer.
target (float tensor of size [batch_num, class_num]):
Binary class target for each sample.
label_weight (float tensor of size [batch_num, class_num]):
the value is 1 if the sample is valid and 0 if ignored.
Returns:
The gradient harmonized loss.
"""
# the target should be binary class label
if pred.dim() != target.dim():
target, label_weight = _expand_onehot_labels(
target, label_weight, pred.size(-1))
target, label_weight = target.float(), label_weight.float()
edges = self.edges
mmt = self.momentum
weights = torch.zeros_like(pred)
# gradient length
g = torch.abs(pred.sigmoid().detach() - target)
valid = label_weight > 0
tot = max(valid.float().sum().item(), 1.0)
n = 0 # n valid bins
for i in range(self.bins):
inds = (g >= edges[i]) & (g < edges[i + 1]) & valid
num_in_bin = inds.sum().item()
if num_in_bin > 0:
if mmt > 0:
self.acc_sum[i] = mmt * self.acc_sum[i] \
+ (1 - mmt) * num_in_bin
weights[inds] = tot / self.acc_sum[i]
else:
weights[inds] = tot / num_in_bin
n += 1
if n > 0:
weights = weights / n
loss = F.binary_cross_entropy_with_logits(
pred, target, weights, reduction='sum') / tot
return loss * self.loss_weight
# TODO: code refactoring to make it consistent with other losses
@LOSSES.register_module()
class GHMR(nn.Module):
"""GHM Regression Loss.
Details of the theorem can be viewed in the paper
`Gradient Harmonized Single-stage Detector
<https://arxiv.org/abs/1811.05181>`_.
Args:
mu (float): The parameter for the Authentic Smooth L1 loss.
bins (int): Number of the unit regions for distribution calculation.
momentum (float): The parameter for moving average.
loss_weight (float): The weight of the total GHM-R loss.
"""
def __init__(self, mu=0.02, bins=10, momentum=0, loss_weight=1.0):
super(GHMR, self).__init__()
self.mu = mu
self.bins = bins
edges = torch.arange(bins + 1).float() / bins
self.register_buffer('edges', edges)
self.edges[-1] = 1e3
self.momentum = momentum
if momentum > 0:
acc_sum = torch.zeros(bins)
self.register_buffer('acc_sum', acc_sum)
self.loss_weight = loss_weight
# TODO: support reduction parameter
def forward(self, pred, target, label_weight, avg_factor=None):
"""Calculate the GHM-R loss.
Args:
pred (float tensor of size [batch_num, 4 (* class_num)]):
The prediction of box regression layer. Channel number can be 4
or 4 * class_num depending on whether it is class-agnostic.
target (float tensor of size [batch_num, 4 (* class_num)]):
The target regression values with the same size of pred.
label_weight (float tensor of size [batch_num, 4 (* class_num)]):
The weight of each sample, 0 if ignored.
Returns:
The gradient harmonized loss.
"""
mu = self.mu
edges = self.edges
mmt = self.momentum
# ASL1 loss
diff = pred - target
loss = torch.sqrt(diff * diff + mu * mu) - mu
# gradient length
g = torch.abs(diff / torch.sqrt(mu * mu + diff * diff)).detach()
weights = torch.zeros_like(g)
valid = label_weight > 0
tot = max(label_weight.float().sum().item(), 1.0)
n = 0 # n: valid bins
for i in range(self.bins):
inds = (g >= edges[i]) & (g < edges[i + 1]) & valid
num_in_bin = inds.sum().item()
if num_in_bin > 0:
n += 1
if mmt > 0:
self.acc_sum[i] = mmt * self.acc_sum[i] \
+ (1 - mmt) * num_in_bin
weights[inds] = tot / self.acc_sum[i]
else:
weights[inds] = tot / num_in_bin
if n > 0:
weights /= n
loss = loss * weights
loss = loss.sum() / tot
return loss * self.loss_weight
import math
import torch
import torch.nn as nn
from mmdet.core import bbox_overlaps
from ..builder import LOSSES
from .utils import weighted_loss
@weighted_loss
def iou_loss(pred, target, linear=False, eps=1e-6):
"""IoU loss.
Computing the IoU loss between a set of predicted bboxes and target bboxes.
The loss is calculated as negative log of IoU.
Args:
pred (torch.Tensor): Predicted bboxes of format (x1, y1, x2, y2),
shape (n, 4).
target (torch.Tensor): Corresponding gt bboxes, shape (n, 4).
linear (bool, optional): If True, use linear scale of loss instead of
log scale. Default: False.
eps (float): Eps to avoid log(0).
Return:
torch.Tensor: Loss tensor.
"""
ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps)
if linear:
loss = 1 - ious
else:
loss = -ious.log()
return loss
@weighted_loss
def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3):
"""BIoULoss.
This is an implementation of paper
`Improving Object Localization with Fitness NMS and Bounded IoU Loss.
<https://arxiv.org/abs/1711.00164>`_.
Args:
pred (torch.Tensor): Predicted bboxes.
target (torch.Tensor): Target bboxes.
beta (float): beta parameter in smoothl1.
eps (float): eps to avoid NaN.
"""
pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5
pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5
pred_w = pred[:, 2] - pred[:, 0]
pred_h = pred[:, 3] - pred[:, 1]
with torch.no_grad():
target_ctrx = (target[:, 0] + target[:, 2]) * 0.5
target_ctry = (target[:, 1] + target[:, 3]) * 0.5
target_w = target[:, 2] - target[:, 0]
target_h = target[:, 3] - target[:, 1]
dx = target_ctrx - pred_ctrx
dy = target_ctry - pred_ctry
loss_dx = 1 - torch.max(
(target_w - 2 * dx.abs()) /
(target_w + 2 * dx.abs() + eps), torch.zeros_like(dx))
loss_dy = 1 - torch.max(
(target_h - 2 * dy.abs()) /
(target_h + 2 * dy.abs() + eps), torch.zeros_like(dy))
loss_dw = 1 - torch.min(target_w / (pred_w + eps), pred_w /
(target_w + eps))
loss_dh = 1 - torch.min(target_h / (pred_h + eps), pred_h /
(target_h + eps))
loss_comb = torch.stack([loss_dx, loss_dy, loss_dw, loss_dh],
dim=-1).view(loss_dx.size(0), -1)
loss = torch.where(loss_comb < beta, 0.5 * loss_comb * loss_comb / beta,
loss_comb - 0.5 * beta)
return loss
@weighted_loss
def giou_loss(pred, target, eps=1e-7):
r"""`Generalized Intersection over Union: A Metric and A Loss for Bounding
Box Regression <https://arxiv.org/abs/1902.09630>`_.
Args:
pred (torch.Tensor): Predicted bboxes of format (x1, y1, x2, y2),
shape (n, 4).
target (torch.Tensor): Corresponding gt bboxes, shape (n, 4).
eps (float): Eps to avoid log(0).
Return:
Tensor: Loss tensor.
"""
gious = bbox_overlaps(pred, target, mode='giou', is_aligned=True, eps=eps)
loss = 1 - gious
return loss
@weighted_loss
def diou_loss(pred, target, eps=1e-7):
r"""`Implementation of Distance-IoU Loss: Faster and Better
Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_.
Code is modified from https://github.com/Zzh-tju/DIoU.
Args:
pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
shape (n, 4).
target (Tensor): Corresponding gt bboxes, shape (n, 4).
eps (float): Eps to avoid log(0).
Return:
Tensor: Loss tensor.
"""
# overlap
lt = torch.max(pred[:, :2], target[:, :2])
rb = torch.min(pred[:, 2:], target[:, 2:])
wh = (rb - lt).clamp(min=0)
overlap = wh[:, 0] * wh[:, 1]
# union
ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
union = ap + ag - overlap + eps
# IoU
ious = overlap / union
# enclose area
enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])
enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])
enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0)
cw = enclose_wh[:, 0]
ch = enclose_wh[:, 1]
c2 = cw**2 + ch**2 + eps
b1_x1, b1_y1 = pred[:, 0], pred[:, 1]
b1_x2, b1_y2 = pred[:, 2], pred[:, 3]
b2_x1, b2_y1 = target[:, 0], target[:, 1]
b2_x2, b2_y2 = target[:, 2], target[:, 3]
left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
rho2 = left + right
# DIoU
dious = ious - rho2 / c2
loss = 1 - dious
return loss
@weighted_loss
def ciou_loss(pred, target, eps=1e-7):
r"""`Implementation of paper `Enhancing Geometric Factors into
Model Learning and Inference for Object Detection and Instance
Segmentation <https://arxiv.org/abs/2005.03572>`_.
Code is modified from https://github.com/Zzh-tju/CIoU.
Args:
pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
shape (n, 4).
target (Tensor): Corresponding gt bboxes, shape (n, 4).
eps (float): Eps to avoid log(0).
Return:
Tensor: Loss tensor.
"""
# overlap
lt = torch.max(pred[:, :2], target[:, :2])
rb = torch.min(pred[:, 2:], target[:, 2:])
wh = (rb - lt).clamp(min=0)
overlap = wh[:, 0] * wh[:, 1]
# union
ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
union = ap + ag - overlap + eps
# IoU
ious = overlap / union
# enclose area
enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])
enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])
enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0)
cw = enclose_wh[:, 0]
ch = enclose_wh[:, 1]
c2 = cw**2 + ch**2 + eps
b1_x1, b1_y1 = pred[:, 0], pred[:, 1]
b1_x2, b1_y2 = pred[:, 2], pred[:, 3]
b2_x1, b2_y1 = target[:, 0], target[:, 1]
b2_x2, b2_y2 = target[:, 2], target[:, 3]
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
rho2 = left + right
factor = 4 / math.pi**2
v = factor * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
# CIoU
cious = ious - (rho2 / c2 + v**2 / (1 - ious + v))
loss = 1 - cious
return loss
@LOSSES.register_module()
class IoULoss(nn.Module):
"""IoULoss.
Computing the IoU loss between a set of predicted bboxes and target bboxes.
Args:
linear (bool): If True, use linear scale of loss instead of log scale.
Default: False.
eps (float): Eps to avoid log(0).
reduction (str): Options are "none", "mean" and "sum".
loss_weight (float): Weight of loss.
"""
def __init__(self,
linear=False,
eps=1e-6,
reduction='mean',
loss_weight=1.0):
super(IoULoss, self).__init__()
self.linear = linear
self.eps = eps
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None. Options are "none", "mean" and "sum".
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if (weight is not None) and (not torch.any(weight > 0)) and (
reduction != 'none'):
return (pred * weight).sum() # 0
if weight is not None and weight.dim() > 1:
# TODO: remove this in the future
# reduce the weight of shape (n, 4) to (n,) to match the
# iou_loss of shape (n,)
assert weight.shape == pred.shape
weight = weight.mean(-1)
loss = self.loss_weight * iou_loss(
pred,
target,
weight,
linear=self.linear,
eps=self.eps,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
@LOSSES.register_module()
class BoundedIoULoss(nn.Module):
def __init__(self, beta=0.2, eps=1e-3, reduction='mean', loss_weight=1.0):
super(BoundedIoULoss, self).__init__()
self.beta = beta
self.eps = eps
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
if weight is not None and not torch.any(weight > 0):
return (pred * weight).sum() # 0
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss = self.loss_weight * bounded_iou_loss(
pred,
target,
weight,
beta=self.beta,
eps=self.eps,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
@LOSSES.register_module()
class GIoULoss(nn.Module):
def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):
super(GIoULoss, self).__init__()
self.eps = eps
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
if weight is not None and not torch.any(weight > 0):
return (pred * weight).sum() # 0
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if weight is not None and weight.dim() > 1:
# TODO: remove this in the future
# reduce the weight of shape (n, 4) to (n,) to match the
# giou_loss of shape (n,)
assert weight.shape == pred.shape
weight = weight.mean(-1)
loss = self.loss_weight * giou_loss(
pred,
target,
weight,
eps=self.eps,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
@LOSSES.register_module()
class DIoULoss(nn.Module):
def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):
super(DIoULoss, self).__init__()
self.eps = eps
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
if weight is not None and not torch.any(weight > 0):
return (pred * weight).sum() # 0
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if weight is not None and weight.dim() > 1:
# TODO: remove this in the future
# reduce the weight of shape (n, 4) to (n,) to match the
# giou_loss of shape (n,)
assert weight.shape == pred.shape
weight = weight.mean(-1)
loss = self.loss_weight * diou_loss(
pred,
target,
weight,
eps=self.eps,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
@LOSSES.register_module()
class CIoULoss(nn.Module):
def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):
super(CIoULoss, self).__init__()
self.eps = eps
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
if weight is not None and not torch.any(weight > 0):
return (pred * weight).sum() # 0
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if weight is not None and weight.dim() > 1:
# TODO: remove this in the future
# reduce the weight of shape (n, 4) to (n,) to match the
# giou_loss of shape (n,)
assert weight.shape == pred.shape
weight = weight.mean(-1)
loss = self.loss_weight * ciou_loss(
pred,
target,
weight,
eps=self.eps,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import weighted_loss
@weighted_loss
def mse_loss(pred, target):
"""Warpper of mse loss."""
return F.mse_loss(pred, target, reduction='none')
@LOSSES.register_module()
class MSELoss(nn.Module):
"""MSELoss.
Args:
reduction (str, optional): The method that reduces the loss to a
scalar. Options are "none", "mean" and "sum".
loss_weight (float, optional): The weight of the loss. Defaults to 1.0
"""
def __init__(self, reduction='mean', loss_weight=1.0):
super().__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self, pred, target, weight=None, avg_factor=None):
"""Forward function of loss.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
weight (torch.Tensor, optional): Weight of the loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
torch.Tensor: The calculated loss
"""
loss = self.loss_weight * mse_loss(
pred,
target,
weight,
reduction=self.reduction,
avg_factor=avg_factor)
return loss
import torch
from mmdet.core import bbox_overlaps
def isr_p(cls_score,
bbox_pred,
bbox_targets,
rois,
sampling_results,
loss_cls,
bbox_coder,
k=2,
bias=0,
num_class=80):
"""Importance-based Sample Reweighting (ISR_P), positive part.
Args:
cls_score (Tensor): Predicted classification scores.
bbox_pred (Tensor): Predicted bbox deltas.
bbox_targets (tuple[Tensor]): A tuple of bbox targets, the are
labels, label_weights, bbox_targets, bbox_weights, respectively.
rois (Tensor): Anchors (single_stage) in shape (n, 4) or RoIs
(two_stage) in shape (n, 5).
sampling_results (obj): Sampling results.
loss_cls (func): Classification loss func of the head.
bbox_coder (obj): BBox coder of the head.
k (float): Power of the non-linear mapping.
bias (float): Shift of the non-linear mapping.
num_class (int): Number of classes, default: 80.
Return:
tuple([Tensor]): labels, imp_based_label_weights, bbox_targets,
bbox_target_weights
"""
labels, label_weights, bbox_targets, bbox_weights = bbox_targets
pos_label_inds = ((labels >= 0) &
(labels < num_class)).nonzero().reshape(-1)
pos_labels = labels[pos_label_inds]
# if no positive samples, return the original targets
num_pos = float(pos_label_inds.size(0))
if num_pos == 0:
return labels, label_weights, bbox_targets, bbox_weights
# merge pos_assigned_gt_inds of per image to a single tensor
gts = list()
last_max_gt = 0
for i in range(len(sampling_results)):
gt_i = sampling_results[i].pos_assigned_gt_inds
gts.append(gt_i + last_max_gt)
if len(gt_i) != 0:
last_max_gt = gt_i.max() + 1
gts = torch.cat(gts)
assert len(gts) == num_pos
cls_score = cls_score.detach()
bbox_pred = bbox_pred.detach()
# For single stage detectors, rois here indicate anchors, in shape (N, 4)
# For two stage detectors, rois are in shape (N, 5)
if rois.size(-1) == 5:
pos_rois = rois[pos_label_inds][:, 1:]
else:
pos_rois = rois[pos_label_inds]
if bbox_pred.size(-1) > 4:
bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)
pos_delta_pred = bbox_pred[pos_label_inds, pos_labels].view(-1, 4)
else:
pos_delta_pred = bbox_pred[pos_label_inds].view(-1, 4)
# compute iou of the predicted bbox and the corresponding GT
pos_delta_target = bbox_targets[pos_label_inds].view(-1, 4)
pos_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_pred)
target_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_target)
ious = bbox_overlaps(pos_bbox_pred, target_bbox_pred, is_aligned=True)
pos_imp_weights = label_weights[pos_label_inds]
# Two steps to compute IoU-HLR. Samples are first sorted by IoU locally,
# then sorted again within the same-rank group
max_l_num = pos_labels.bincount().max()
for label in pos_labels.unique():
l_inds = (pos_labels == label).nonzero().view(-1)
l_gts = gts[l_inds]
for t in l_gts.unique():
t_inds = l_inds[l_gts == t]
t_ious = ious[t_inds]
_, t_iou_rank_idx = t_ious.sort(descending=True)
_, t_iou_rank = t_iou_rank_idx.sort()
ious[t_inds] += max_l_num - t_iou_rank.float()
l_ious = ious[l_inds]
_, l_iou_rank_idx = l_ious.sort(descending=True)
_, l_iou_rank = l_iou_rank_idx.sort() # IoU-HLR
# linearly map HLR to label weights
pos_imp_weights[l_inds] *= (max_l_num - l_iou_rank.float()) / max_l_num
pos_imp_weights = (bias + pos_imp_weights * (1 - bias)).pow(k)
# normalize to make the new weighted loss value equal to the original loss
pos_loss_cls = loss_cls(
cls_score[pos_label_inds], pos_labels, reduction_override='none')
if pos_loss_cls.dim() > 1:
ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds][:,
None]
new_pos_loss_cls = pos_loss_cls * pos_imp_weights[:, None]
else:
ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds]
new_pos_loss_cls = pos_loss_cls * pos_imp_weights
pos_loss_cls_ratio = ori_pos_loss_cls.sum() / new_pos_loss_cls.sum()
pos_imp_weights = pos_imp_weights * pos_loss_cls_ratio
label_weights[pos_label_inds] = pos_imp_weights
bbox_targets = labels, label_weights, bbox_targets, bbox_weights
return bbox_targets
def carl_loss(cls_score,
labels,
bbox_pred,
bbox_targets,
loss_bbox,
k=1,
bias=0.2,
avg_factor=None,
sigmoid=False,
num_class=80):
"""Classification-Aware Regression Loss (CARL).
Args:
cls_score (Tensor): Predicted classification scores.
labels (Tensor): Targets of classification.
bbox_pred (Tensor): Predicted bbox deltas.
bbox_targets (Tensor): Target of bbox regression.
loss_bbox (func): Regression loss func of the head.
bbox_coder (obj): BBox coder of the head.
k (float): Power of the non-linear mapping.
bias (float): Shift of the non-linear mapping.
avg_factor (int): Average factor used in regression loss.
sigmoid (bool): Activation of the classification score.
num_class (int): Number of classes, default: 80.
Return:
dict: CARL loss dict.
"""
pos_label_inds = ((labels >= 0) &
(labels < num_class)).nonzero().reshape(-1)
if pos_label_inds.numel() == 0:
return dict(loss_carl=cls_score.sum()[None] * 0.)
pos_labels = labels[pos_label_inds]
# multiply pos_cls_score with the corresponding bbox weight
# and remain gradient
if sigmoid:
pos_cls_score = cls_score.sigmoid()[pos_label_inds, pos_labels]
else:
pos_cls_score = cls_score.softmax(-1)[pos_label_inds, pos_labels]
carl_loss_weights = (bias + (1 - bias) * pos_cls_score).pow(k)
# normalize carl_loss_weight to make its sum equal to num positive
num_pos = float(pos_cls_score.size(0))
weight_ratio = num_pos / carl_loss_weights.sum()
carl_loss_weights *= weight_ratio
if avg_factor is None:
avg_factor = bbox_targets.size(0)
# if is class agnostic, bbox pred is in shape (N, 4)
# otherwise, bbox pred is in shape (N, #classes, 4)
if bbox_pred.size(-1) > 4:
bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)
pos_bbox_preds = bbox_pred[pos_label_inds, pos_labels]
else:
pos_bbox_preds = bbox_pred[pos_label_inds]
ori_loss_reg = loss_bbox(
pos_bbox_preds,
bbox_targets[pos_label_inds],
reduction_override='none') / avg_factor
loss_carl = (ori_loss_reg * carl_loss_weights[:, None]).sum()
return dict(loss_carl=loss_carl[None])
import torch
import torch.nn as nn
from ..builder import LOSSES
from .utils import weighted_loss
@weighted_loss
def smooth_l1_loss(pred, target, beta=1.0):
"""Smooth L1 loss.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
beta (float, optional): The threshold in the piecewise function.
Defaults to 1.0.
Returns:
torch.Tensor: Calculated loss
"""
assert beta > 0
assert pred.size() == target.size() and target.numel() > 0
diff = torch.abs(pred - target)
loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
diff - 0.5 * beta)
return loss
@weighted_loss
def l1_loss(pred, target):
"""L1 loss.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
Returns:
torch.Tensor: Calculated loss
"""
assert pred.size() == target.size() and target.numel() > 0
loss = torch.abs(pred - target)
return loss
@LOSSES.register_module()
class SmoothL1Loss(nn.Module):
"""Smooth L1 loss.
Args:
beta (float, optional): The threshold in the piecewise function.
Defaults to 1.0.
reduction (str, optional): The method to reduce the loss.
Options are "none", "mean" and "sum". Defaults to "mean".
loss_weight (float, optional): The weight of loss.
"""
def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
super(SmoothL1Loss, self).__init__()
self.beta = beta
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_bbox = self.loss_weight * smooth_l1_loss(
pred,
target,
weight,
beta=self.beta,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss_bbox
@LOSSES.register_module()
class L1Loss(nn.Module):
"""L1 loss.
Args:
reduction (str, optional): The method to reduce the loss.
Options are "none", "mean" and "sum".
loss_weight (float, optional): The weight of loss.
"""
def __init__(self, reduction='mean', loss_weight=1.0):
super(L1Loss, self).__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_bbox = self.loss_weight * l1_loss(
pred, target, weight, reduction=reduction, avg_factor=avg_factor)
return loss_bbox
import functools
import torch.nn.functional as F
def reduce_loss(loss, reduction):
"""Reduce loss as specified.
Args:
loss (Tensor): Elementwise loss tensor.
reduction (str): Options are "none", "mean" and "sum".
Return:
Tensor: Reduced loss tensor.
"""
reduction_enum = F._Reduction.get_enum(reduction)
# none: 0, elementwise_mean:1, sum: 2
if reduction_enum == 0:
return loss
elif reduction_enum == 1:
return loss.mean()
elif reduction_enum == 2:
return loss.sum()
def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
"""Apply element-wise weight and reduce loss.
Args:
loss (Tensor): Element-wise loss.
weight (Tensor): Element-wise weights.
reduction (str): Same as built-in losses of PyTorch.
avg_factor (float): Avarage factor when computing the mean of losses.
Returns:
Tensor: Processed loss values.
"""
# if weight is specified, apply element-wise weight
if weight is not None:
loss = loss * weight
# if avg_factor is not specified, just reduce the loss
if avg_factor is None:
loss = reduce_loss(loss, reduction)
else:
# if reduction is mean, then average the loss by avg_factor
if reduction == 'mean':
loss = loss.sum() / avg_factor
# if reduction is 'none', then do nothing, otherwise raise an error
elif reduction != 'none':
raise ValueError('avg_factor can not be used with reduction="sum"')
return loss
def weighted_loss(loss_func):
"""Create a weighted version of a given loss function.
To use this decorator, the loss function must have the signature like
`loss_func(pred, target, **kwargs)`. The function only needs to compute
element-wise loss without any reduction. This decorator will add weight
and reduction arguments to the function. The decorated function will have
the signature like `loss_func(pred, target, weight=None, reduction='mean',
avg_factor=None, **kwargs)`.
:Example:
>>> import torch
>>> @weighted_loss
>>> def l1_loss(pred, target):
>>> return (pred - target).abs()
>>> pred = torch.Tensor([0, 2, 3])
>>> target = torch.Tensor([1, 1, 1])
>>> weight = torch.Tensor([1, 0, 1])
>>> l1_loss(pred, target)
tensor(1.3333)
>>> l1_loss(pred, target, weight)
tensor(1.)
>>> l1_loss(pred, target, reduction='none')
tensor([1., 1., 2.])
>>> l1_loss(pred, target, weight, avg_factor=2)
tensor(1.5000)
"""
@functools.wraps(loss_func)
def wrapper(pred,
target,
weight=None,
reduction='mean',
avg_factor=None,
**kwargs):
# get element-wise loss
loss = loss_func(pred, target, **kwargs)
loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
return loss
return wrapper
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import weight_reduce_loss
def varifocal_loss(pred,
target,
weight=None,
alpha=0.75,
gamma=2.0,
iou_weighted=True,
reduction='mean',
avg_factor=None):
"""`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_
Args:
pred (torch.Tensor): The prediction with shape (N, C), C is the
number of classes
target (torch.Tensor): The learning target of the iou-aware
classification score with shape (N, C), C is the number of classes.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
alpha (float, optional): A balance factor for the negative part of
Varifocal Loss, which is different from the alpha of Focal Loss.
Defaults to 0.75.
gamma (float, optional): The gamma for calculating the modulating
factor. Defaults to 2.0.
iou_weighted (bool, optional): Whether to weight the loss of the
positive example with the iou target. Defaults to True.
reduction (str, optional): The method used to reduce the loss into
a scalar. Defaults to 'mean'. Options are "none", "mean" and
"sum".
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
"""
# pred and target should be of the same size
assert pred.size() == target.size()
pred_sigmoid = pred.sigmoid()
target = target.type_as(pred)
if iou_weighted:
focal_weight = target * (target > 0.0).float() + \
alpha * (pred_sigmoid - target).abs().pow(gamma) * \
(target <= 0.0).float()
else:
focal_weight = (target > 0.0).float() + \
alpha * (pred_sigmoid - target).abs().pow(gamma) * \
(target <= 0.0).float()
loss = F.binary_cross_entropy_with_logits(
pred, target, reduction='none') * focal_weight
loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
return loss
@LOSSES.register_module()
class VarifocalLoss(nn.Module):
def __init__(self,
use_sigmoid=True,
alpha=0.75,
gamma=2.0,
iou_weighted=True,
reduction='mean',
loss_weight=1.0):
"""`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_
Args:
use_sigmoid (bool, optional): Whether the prediction is
used for sigmoid or softmax. Defaults to True.
alpha (float, optional): A balance factor for the negative part of
Varifocal Loss, which is different from the alpha of Focal
Loss. Defaults to 0.75.
gamma (float, optional): The gamma for calculating the modulating
factor. Defaults to 2.0.
iou_weighted (bool, optional): Whether to weight the loss of the
positive examples with the iou target. Defaults to True.
reduction (str, optional): The method used to reduce the loss into
a scalar. Defaults to 'mean'. Options are "none", "mean" and
"sum".
loss_weight (float, optional): Weight of loss. Defaults to 1.0.
"""
super(VarifocalLoss, self).__init__()
assert use_sigmoid is True, \
'Only sigmoid varifocal loss supported now.'
assert alpha >= 0.0
self.use_sigmoid = use_sigmoid
self.alpha = alpha
self.gamma = gamma
self.iou_weighted = iou_weighted
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Options are "none", "mean" and "sum".
Returns:
torch.Tensor: The calculated loss
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if self.use_sigmoid:
loss_cls = self.loss_weight * varifocal_loss(
pred,
target,
weight,
alpha=self.alpha,
gamma=self.gamma,
iou_weighted=self.iou_weighted,
reduction=reduction,
avg_factor=avg_factor)
else:
raise NotImplementedError
return loss_cls
from .bfp import BFP
from .channel_mapper import ChannelMapper
from .fpn import FPN
from .fpn_carafe import FPN_CARAFE
from .hrfpn import HRFPN
from .nas_fpn import NASFPN
from .nasfcos_fpn import NASFCOS_FPN
from .pafpn import PAFPN
from .rfp import RFP
from .yolo_neck import YOLOV3Neck
__all__ = [
'FPN', 'BFP', 'ChannelMapper', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN',
'NASFCOS_FPN', 'RFP', 'YOLOV3Neck'
]
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule, xavier_init
from mmcv.cnn.bricks import NonLocal2d
from ..builder import NECKS
@NECKS.register_module()
class BFP(nn.Module):
"""BFP (Balanced Feature Pyrmamids)
BFP takes multi-level features as inputs and gather them into a single one,
then refine the gathered feature and scatter the refined results to
multi-level features. This module is used in Libra R-CNN (CVPR 2019), see
the paper `Libra R-CNN: Towards Balanced Learning for Object Detection
<https://arxiv.org/abs/1904.02701>`_ for details.
Args:
in_channels (int): Number of input channels (feature maps of all levels
should have the same channels).
num_levels (int): Number of input feature levels.
conv_cfg (dict): The config dict for convolution layers.
norm_cfg (dict): The config dict for normalization layers.
refine_level (int): Index of integration and refine level of BSF in
multi-level features from bottom to top.
refine_type (str): Type of the refine op, currently support
[None, 'conv', 'non_local'].
"""
def __init__(self,
in_channels,
num_levels,
refine_level=2,
refine_type=None,
conv_cfg=None,
norm_cfg=None):
super(BFP, self).__init__()
assert refine_type in [None, 'conv', 'non_local']
self.in_channels = in_channels
self.num_levels = num_levels
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.refine_level = refine_level
self.refine_type = refine_type
assert 0 <= self.refine_level < self.num_levels
if self.refine_type == 'conv':
self.refine = ConvModule(
self.in_channels,
self.in_channels,
3,
padding=1,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg)
elif self.refine_type == 'non_local':
self.refine = NonLocal2d(
self.in_channels,
reduction=1,
use_scale=False,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg)
def init_weights(self):
"""Initialize the weights of FPN module."""
for m in self.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform')
def forward(self, inputs):
"""Forward function."""
assert len(inputs) == self.num_levels
# step 1: gather multi-level features by resize and average
feats = []
gather_size = inputs[self.refine_level].size()[2:]
for i in range(self.num_levels):
if i < self.refine_level:
gathered = F.adaptive_max_pool2d(
inputs[i], output_size=gather_size)
else:
gathered = F.interpolate(
inputs[i], size=gather_size, mode='nearest')
feats.append(gathered)
bsf = sum(feats) / len(feats)
# step 2: refine gathered features
if self.refine_type is not None:
bsf = self.refine(bsf)
# step 3: scatter refined features to multi-levels by a residual path
outs = []
for i in range(self.num_levels):
out_size = inputs[i].size()[2:]
if i < self.refine_level:
residual = F.interpolate(bsf, size=out_size, mode='nearest')
else:
residual = F.adaptive_max_pool2d(bsf, output_size=out_size)
outs.append(residual + inputs[i])
return tuple(outs)
import torch.nn as nn
from mmcv.cnn import ConvModule, xavier_init
from ..builder import NECKS
@NECKS.register_module()
class ChannelMapper(nn.Module):
r"""Channel Mapper to reduce/increase channels of backbone features.
This is used to reduce/increase channels of backbone features.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale).
kernel_size (int, optional): kernel_size for reducing channels (used
at each scale). Default: 3.
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None.
norm_cfg (dict, optional): Config dict for normalization layer.
Default: None.
act_cfg (dict, optional): Config dict for activation layer in
ConvModule. Default: dict(type='ReLU').
Example:
>>> import torch
>>> in_channels = [2, 3, 5, 7]
>>> scales = [340, 170, 84, 43]
>>> inputs = [torch.rand(1, c, s, s)
... for c, s in zip(in_channels, scales)]
>>> self = ChannelMapper(in_channels, 11, 3).eval()
>>> outputs = self.forward(inputs)
>>> for i in range(len(outputs)):
... print(f'outputs[{i}].shape = {outputs[i].shape}')
outputs[0].shape = torch.Size([1, 11, 340, 340])
outputs[1].shape = torch.Size([1, 11, 170, 170])
outputs[2].shape = torch.Size([1, 11, 84, 84])
outputs[3].shape = torch.Size([1, 11, 43, 43])
"""
def __init__(self,
in_channels,
out_channels,
kernel_size=3,
conv_cfg=None,
norm_cfg=None,
act_cfg=dict(type='ReLU')):
super(ChannelMapper, self).__init__()
assert isinstance(in_channels, list)
self.convs = nn.ModuleList()
for in_channel in in_channels:
self.convs.append(
ConvModule(
in_channel,
out_channels,
kernel_size,
padding=(kernel_size - 1) // 2,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg))
# default init_weights for conv(msra) and norm in ConvModule
def init_weights(self):
"""Initialize the weights of ChannelMapper module."""
for m in self.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform')
def forward(self, inputs):
"""Forward function."""
assert len(inputs) == len(self.convs)
outs = [self.convs[i](inputs[i]) for i in range(len(inputs))]
return tuple(outs)
import warnings
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule, xavier_init
from mmcv.runner import auto_fp16
from ..builder import NECKS
@NECKS.register_module()
class FPN(nn.Module):
r"""Feature Pyramid Network.
This is an implementation of paper `Feature Pyramid Networks for Object
Detection <https://arxiv.org/abs/1612.03144>`_.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale)
num_outs (int): Number of output scales.
start_level (int): Index of the start input backbone level used to
build the feature pyramid. Default: 0.
end_level (int): Index of the end input backbone level (exclusive) to
build the feature pyramid. Default: -1, which means the last level.
add_extra_convs (bool | str): If bool, it decides whether to add conv
layers on top of the original feature maps. Default to False.
If True, its actual mode is specified by `extra_convs_on_inputs`.
If str, it specifies the source feature map of the extra convs.
Only the following options are allowed
- 'on_input': Last feat map of neck inputs (i.e. backbone feature).
- 'on_lateral': Last feature map after lateral convs.
- 'on_output': The last output feature map after fpn convs.
extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs
on the original feature from the backbone. If True,
it is equivalent to `add_extra_convs='on_input'`. If False, it is
equivalent to set `add_extra_convs='on_output'`. Default to True.
relu_before_extra_convs (bool): Whether to apply relu before the extra
conv. Default: False.
no_norm_on_lateral (bool): Whether to apply norm on lateral.
Default: False.
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Config dict for normalization layer. Default: None.
act_cfg (str): Config dict for activation layer in ConvModule.
Default: None.
upsample_cfg (dict): Config dict for interpolate layer.
Default: `dict(mode='nearest')`
Example:
>>> import torch
>>> in_channels = [2, 3, 5, 7]
>>> scales = [340, 170, 84, 43]
>>> inputs = [torch.rand(1, c, s, s)
... for c, s in zip(in_channels, scales)]
>>> self = FPN(in_channels, 11, len(in_channels)).eval()
>>> outputs = self.forward(inputs)
>>> for i in range(len(outputs)):
... print(f'outputs[{i}].shape = {outputs[i].shape}')
outputs[0].shape = torch.Size([1, 11, 340, 340])
outputs[1].shape = torch.Size([1, 11, 170, 170])
outputs[2].shape = torch.Size([1, 11, 84, 84])
outputs[3].shape = torch.Size([1, 11, 43, 43])
"""
def __init__(self,
in_channels,
out_channels,
num_outs,
start_level=0,
end_level=-1,
add_extra_convs=False,
extra_convs_on_inputs=True,
relu_before_extra_convs=False,
no_norm_on_lateral=False,
conv_cfg=None,
norm_cfg=None,
act_cfg=None,
upsample_cfg=dict(mode='nearest')):
super(FPN, self).__init__()
assert isinstance(in_channels, list)
self.in_channels = in_channels
self.out_channels = out_channels
self.num_ins = len(in_channels)
self.num_outs = num_outs
self.relu_before_extra_convs = relu_before_extra_convs
self.no_norm_on_lateral = no_norm_on_lateral
self.fp16_enabled = False
self.upsample_cfg = upsample_cfg.copy()
if end_level == -1:
self.backbone_end_level = self.num_ins
assert num_outs >= self.num_ins - start_level
else:
# if end_level < inputs, no extra level is allowed
self.backbone_end_level = end_level
assert end_level <= len(in_channels)
assert num_outs == end_level - start_level
self.start_level = start_level
self.end_level = end_level
self.add_extra_convs = add_extra_convs
assert isinstance(add_extra_convs, (str, bool))
if isinstance(add_extra_convs, str):
# Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
elif add_extra_convs: # True
if extra_convs_on_inputs:
# TODO: deprecate `extra_convs_on_inputs`
warnings.simplefilter('once')
warnings.warn(
'"extra_convs_on_inputs" will be deprecated in v2.9.0,'
'Please use "add_extra_convs"', DeprecationWarning)
self.add_extra_convs = 'on_input'
else:
self.add_extra_convs = 'on_output'
self.lateral_convs = nn.ModuleList()
self.fpn_convs = nn.ModuleList()
for i in range(self.start_level, self.backbone_end_level):
l_conv = ConvModule(
in_channels[i],
out_channels,
1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
act_cfg=act_cfg,
inplace=False)
fpn_conv = ConvModule(
out_channels,
out_channels,
3,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
inplace=False)
self.lateral_convs.append(l_conv)
self.fpn_convs.append(fpn_conv)
# add extra conv layers (e.g., RetinaNet)
extra_levels = num_outs - self.backbone_end_level + self.start_level
if self.add_extra_convs and extra_levels >= 1:
for i in range(extra_levels):
if i == 0 and self.add_extra_convs == 'on_input':
in_channels = self.in_channels[self.backbone_end_level - 1]
else:
in_channels = out_channels
extra_fpn_conv = ConvModule(
in_channels,
out_channels,
3,
stride=2,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
inplace=False)
self.fpn_convs.append(extra_fpn_conv)
# default init_weights for conv(msra) and norm in ConvModule
def init_weights(self):
"""Initialize the weights of FPN module."""
for m in self.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform')
@auto_fp16()
def forward(self, inputs):
"""Forward function."""
assert len(inputs) == len(self.in_channels)
# build laterals
laterals = [
lateral_conv(inputs[i + self.start_level])
for i, lateral_conv in enumerate(self.lateral_convs)
]
# build top-down path
used_backbone_levels = len(laterals)
for i in range(used_backbone_levels - 1, 0, -1):
# In some cases, fixing `scale factor` (e.g. 2) is preferred, but
# it cannot co-exist with `size` in `F.interpolate`.
if 'scale_factor' in self.upsample_cfg:
laterals[i - 1] += F.interpolate(laterals[i],
**self.upsample_cfg)
else:
prev_shape = laterals[i - 1].shape[2:]
laterals[i - 1] += F.interpolate(
laterals[i], size=prev_shape, **self.upsample_cfg)
# build outputs
# part 1: from original levels
outs = [
self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
]
# part 2: add extra levels
if self.num_outs > len(outs):
# use max pool to get more levels on top of outputs
# (e.g., Faster R-CNN, Mask R-CNN)
if not self.add_extra_convs:
for i in range(self.num_outs - used_backbone_levels):
outs.append(F.max_pool2d(outs[-1], 1, stride=2))
# add conv layers on top of original feature maps (RetinaNet)
else:
if self.add_extra_convs == 'on_input':
extra_source = inputs[self.backbone_end_level - 1]
elif self.add_extra_convs == 'on_lateral':
extra_source = laterals[-1]
elif self.add_extra_convs == 'on_output':
extra_source = outs[-1]
else:
raise NotImplementedError
outs.append(self.fpn_convs[used_backbone_levels](extra_source))
for i in range(used_backbone_levels + 1, self.num_outs):
if self.relu_before_extra_convs:
outs.append(self.fpn_convs[i](F.relu(outs[-1])))
else:
outs.append(self.fpn_convs[i](outs[-1]))
return tuple(outs)
import torch.nn as nn
from mmcv.cnn import ConvModule, build_upsample_layer, xavier_init
from mmcv.ops.carafe import CARAFEPack
from ..builder import NECKS
@NECKS.register_module()
class FPN_CARAFE(nn.Module):
"""FPN_CARAFE is a more flexible implementation of FPN. It allows more
choice for upsample methods during the top-down pathway.
It can reproduce the preformance of ICCV 2019 paper
CARAFE: Content-Aware ReAssembly of FEatures
Please refer to https://arxiv.org/abs/1905.02188 for more details.
Args:
in_channels (list[int]): Number of channels for each input feature map.
out_channels (int): Output channels of feature pyramids.
num_outs (int): Number of output stages.
start_level (int): Start level of feature pyramids.
(Default: 0)
end_level (int): End level of feature pyramids.
(Default: -1 indicates the last level).
norm_cfg (dict): Dictionary to construct and config norm layer.
activate (str): Type of activation function in ConvModule
(Default: None indicates w/o activation).
order (dict): Order of components in ConvModule.
upsample (str): Type of upsample layer.
upsample_cfg (dict): Dictionary to construct and config upsample layer.
"""
def __init__(self,
in_channels,
out_channels,
num_outs,
start_level=0,
end_level=-1,
norm_cfg=None,
act_cfg=None,
order=('conv', 'norm', 'act'),
upsample_cfg=dict(
type='carafe',
up_kernel=5,
up_group=1,
encoder_kernel=3,
encoder_dilation=1)):
super(FPN_CARAFE, self).__init__()
assert isinstance(in_channels, list)
self.in_channels = in_channels
self.out_channels = out_channels
self.num_ins = len(in_channels)
self.num_outs = num_outs
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.with_bias = norm_cfg is None
self.upsample_cfg = upsample_cfg.copy()
self.upsample = self.upsample_cfg.get('type')
self.relu = nn.ReLU(inplace=False)
self.order = order
assert order in [('conv', 'norm', 'act'), ('act', 'conv', 'norm')]
assert self.upsample in [
'nearest', 'bilinear', 'deconv', 'pixel_shuffle', 'carafe', None
]
if self.upsample in ['deconv', 'pixel_shuffle']:
assert hasattr(
self.upsample_cfg,
'upsample_kernel') and self.upsample_cfg.upsample_kernel > 0
self.upsample_kernel = self.upsample_cfg.pop('upsample_kernel')
if end_level == -1:
self.backbone_end_level = self.num_ins
assert num_outs >= self.num_ins - start_level
else:
# if end_level < inputs, no extra level is allowed
self.backbone_end_level = end_level
assert end_level <= len(in_channels)
assert num_outs == end_level - start_level
self.start_level = start_level
self.end_level = end_level
self.lateral_convs = nn.ModuleList()
self.fpn_convs = nn.ModuleList()
self.upsample_modules = nn.ModuleList()
for i in range(self.start_level, self.backbone_end_level):
l_conv = ConvModule(
in_channels[i],
out_channels,
1,
norm_cfg=norm_cfg,
bias=self.with_bias,
act_cfg=act_cfg,
inplace=False,
order=self.order)
fpn_conv = ConvModule(
out_channels,
out_channels,
3,
padding=1,
norm_cfg=self.norm_cfg,
bias=self.with_bias,
act_cfg=act_cfg,
inplace=False,
order=self.order)
if i != self.backbone_end_level - 1:
upsample_cfg_ = self.upsample_cfg.copy()
if self.upsample == 'deconv':
upsample_cfg_.update(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=self.upsample_kernel,
stride=2,
padding=(self.upsample_kernel - 1) // 2,
output_padding=(self.upsample_kernel - 1) // 2)
elif self.upsample == 'pixel_shuffle':
upsample_cfg_.update(
in_channels=out_channels,
out_channels=out_channels,
scale_factor=2,
upsample_kernel=self.upsample_kernel)
elif self.upsample == 'carafe':
upsample_cfg_.update(channels=out_channels, scale_factor=2)
else:
# suppress warnings
align_corners = (None
if self.upsample == 'nearest' else False)
upsample_cfg_.update(
scale_factor=2,
mode=self.upsample,
align_corners=align_corners)
upsample_module = build_upsample_layer(upsample_cfg_)
self.upsample_modules.append(upsample_module)
self.lateral_convs.append(l_conv)
self.fpn_convs.append(fpn_conv)
# add extra conv layers (e.g., RetinaNet)
extra_out_levels = (
num_outs - self.backbone_end_level + self.start_level)
if extra_out_levels >= 1:
for i in range(extra_out_levels):
in_channels = (
self.in_channels[self.backbone_end_level -
1] if i == 0 else out_channels)
extra_l_conv = ConvModule(
in_channels,
out_channels,
3,
stride=2,
padding=1,
norm_cfg=norm_cfg,
bias=self.with_bias,
act_cfg=act_cfg,
inplace=False,
order=self.order)
if self.upsample == 'deconv':
upsampler_cfg_ = dict(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=self.upsample_kernel,
stride=2,
padding=(self.upsample_kernel - 1) // 2,
output_padding=(self.upsample_kernel - 1) // 2)
elif self.upsample == 'pixel_shuffle':
upsampler_cfg_ = dict(
in_channels=out_channels,
out_channels=out_channels,
scale_factor=2,
upsample_kernel=self.upsample_kernel)
elif self.upsample == 'carafe':
upsampler_cfg_ = dict(
channels=out_channels,
scale_factor=2,
**self.upsample_cfg)
else:
# suppress warnings
align_corners = (None
if self.upsample == 'nearest' else False)
upsampler_cfg_ = dict(
scale_factor=2,
mode=self.upsample,
align_corners=align_corners)
upsampler_cfg_['type'] = self.upsample
upsample_module = build_upsample_layer(upsampler_cfg_)
extra_fpn_conv = ConvModule(
out_channels,
out_channels,
3,
padding=1,
norm_cfg=self.norm_cfg,
bias=self.with_bias,
act_cfg=act_cfg,
inplace=False,
order=self.order)
self.upsample_modules.append(upsample_module)
self.fpn_convs.append(extra_fpn_conv)
self.lateral_convs.append(extra_l_conv)
# default init_weights for conv(msra) and norm in ConvModule
def init_weights(self):
"""Initialize the weights of module."""
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
xavier_init(m, distribution='uniform')
for m in self.modules():
if isinstance(m, CARAFEPack):
m.init_weights()
def slice_as(self, src, dst):
"""Slice ``src`` as ``dst``
Note:
``src`` should have the same or larger size than ``dst``.
Args:
src (torch.Tensor): Tensors to be sliced.
dst (torch.Tensor): ``src`` will be sliced to have the same
size as ``dst``.
Returns:
torch.Tensor: Sliced tensor.
"""
assert (src.size(2) >= dst.size(2)) and (src.size(3) >= dst.size(3))
if src.size(2) == dst.size(2) and src.size(3) == dst.size(3):
return src
else:
return src[:, :, :dst.size(2), :dst.size(3)]
def tensor_add(self, a, b):
"""Add tensors ``a`` and ``b`` that might have different sizes."""
if a.size() == b.size():
c = a + b
else:
c = a + self.slice_as(b, a)
return c
def forward(self, inputs):
"""Forward function."""
assert len(inputs) == len(self.in_channels)
# build laterals
laterals = []
for i, lateral_conv in enumerate(self.lateral_convs):
if i <= self.backbone_end_level - self.start_level:
input = inputs[min(i + self.start_level, len(inputs) - 1)]
else:
input = laterals[-1]
lateral = lateral_conv(input)
laterals.append(lateral)
# build top-down path
for i in range(len(laterals) - 1, 0, -1):
if self.upsample is not None:
upsample_feat = self.upsample_modules[i - 1](laterals[i])
else:
upsample_feat = laterals[i]
laterals[i - 1] = self.tensor_add(laterals[i - 1], upsample_feat)
# build outputs
num_conv_outs = len(self.fpn_convs)
outs = []
for i in range(num_conv_outs):
out = self.fpn_convs[i](laterals[i])
outs.append(out)
return tuple(outs)
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule, caffe2_xavier_init
from torch.utils.checkpoint import checkpoint
from ..builder import NECKS
@NECKS.register_module()
class HRFPN(nn.Module):
"""HRFPN (High Resolution Feature Pyrmamids)
paper: `High-Resolution Representations for Labeling Pixels and Regions
<https://arxiv.org/abs/1904.04514>`_.
Args:
in_channels (list): number of channels for each branch.
out_channels (int): output channels of feature pyramids.
num_outs (int): number of output stages.
pooling_type (str): pooling for generating feature pyramids
from {MAX, AVG}.
conv_cfg (dict): dictionary to construct and config conv layer.
norm_cfg (dict): dictionary to construct and config norm layer.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
stride (int): stride of 3x3 convolutional layers
"""
def __init__(self,
in_channels,
out_channels,
num_outs=5,
pooling_type='AVG',
conv_cfg=None,
norm_cfg=None,
with_cp=False,
stride=1):
super(HRFPN, self).__init__()
assert isinstance(in_channels, list)
self.in_channels = in_channels
self.out_channels = out_channels
self.num_ins = len(in_channels)
self.num_outs = num_outs
self.with_cp = with_cp
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.reduction_conv = ConvModule(
sum(in_channels),
out_channels,
kernel_size=1,
conv_cfg=self.conv_cfg,
act_cfg=None)
self.fpn_convs = nn.ModuleList()
for i in range(self.num_outs):
self.fpn_convs.append(
ConvModule(
out_channels,
out_channels,
kernel_size=3,
padding=1,
stride=stride,
conv_cfg=self.conv_cfg,
act_cfg=None))
if pooling_type == 'MAX':
self.pooling = F.max_pool2d
else:
self.pooling = F.avg_pool2d
def init_weights(self):
"""Initialize the weights of module."""
for m in self.modules():
if isinstance(m, nn.Conv2d):
caffe2_xavier_init(m)
def forward(self, inputs):
"""Forward function."""
assert len(inputs) == self.num_ins
outs = [inputs[0]]
for i in range(1, self.num_ins):
outs.append(
F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear'))
out = torch.cat(outs, dim=1)
if out.requires_grad and self.with_cp:
out = checkpoint(self.reduction_conv, out)
else:
out = self.reduction_conv(out)
outs = [out]
for i in range(1, self.num_outs):
outs.append(self.pooling(out, kernel_size=2**i, stride=2**i))
outputs = []
for i in range(self.num_outs):
if outs[i].requires_grad and self.with_cp:
tmp_out = checkpoint(self.fpn_convs[i], outs[i])
else:
tmp_out = self.fpn_convs[i](outs[i])
outputs.append(tmp_out)
return tuple(outputs)
import torch.nn as nn
from mmcv.cnn import ConvModule, caffe2_xavier_init
from mmcv.ops.merge_cells import GlobalPoolingCell, SumCell
from ..builder import NECKS
@NECKS.register_module()
class NASFPN(nn.Module):
"""NAS-FPN.
Implementation of `NAS-FPN: Learning Scalable Feature Pyramid Architecture
for Object Detection <https://arxiv.org/abs/1904.07392>`_
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale)
num_outs (int): Number of output scales.
stack_times (int): The number of times the pyramid architecture will
be stacked.
start_level (int): Index of the start input backbone level used to
build the feature pyramid. Default: 0.
end_level (int): Index of the end input backbone level (exclusive) to
build the feature pyramid. Default: -1, which means the last level.
add_extra_convs (bool): It decides whether to add conv
layers on top of the original feature maps. Default to False.
If True, its actual mode is specified by `extra_convs_on_inputs`.
"""
def __init__(self,
in_channels,
out_channels,
num_outs,
stack_times,
start_level=0,
end_level=-1,
add_extra_convs=False,
norm_cfg=None):
super(NASFPN, self).__init__()
assert isinstance(in_channels, list)
self.in_channels = in_channels
self.out_channels = out_channels
self.num_ins = len(in_channels) # num of input feature levels
self.num_outs = num_outs # num of output feature levels
self.stack_times = stack_times
self.norm_cfg = norm_cfg
if end_level == -1:
self.backbone_end_level = self.num_ins
assert num_outs >= self.num_ins - start_level
else:
# if end_level < inputs, no extra level is allowed
self.backbone_end_level = end_level
assert end_level <= len(in_channels)
assert num_outs == end_level - start_level
self.start_level = start_level
self.end_level = end_level
self.add_extra_convs = add_extra_convs
# add lateral connections
self.lateral_convs = nn.ModuleList()
for i in range(self.start_level, self.backbone_end_level):
l_conv = ConvModule(
in_channels[i],
out_channels,
1,
norm_cfg=norm_cfg,
act_cfg=None)
self.lateral_convs.append(l_conv)
# add extra downsample layers (stride-2 pooling or conv)
extra_levels = num_outs - self.backbone_end_level + self.start_level
self.extra_downsamples = nn.ModuleList()
for i in range(extra_levels):
extra_conv = ConvModule(
out_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=None)
self.extra_downsamples.append(
nn.Sequential(extra_conv, nn.MaxPool2d(2, 2)))
# add NAS FPN connections
self.fpn_stages = nn.ModuleList()
for _ in range(self.stack_times):
stage = nn.ModuleDict()
# gp(p6, p4) -> p4_1
stage['gp_64_4'] = GlobalPoolingCell(
in_channels=out_channels,
out_channels=out_channels,
out_norm_cfg=norm_cfg)
# sum(p4_1, p4) -> p4_2
stage['sum_44_4'] = SumCell(
in_channels=out_channels,
out_channels=out_channels,
out_norm_cfg=norm_cfg)
# sum(p4_2, p3) -> p3_out
stage['sum_43_3'] = SumCell(
in_channels=out_channels,
out_channels=out_channels,
out_norm_cfg=norm_cfg)
# sum(p3_out, p4_2) -> p4_out
stage['sum_34_4'] = SumCell(
in_channels=out_channels,
out_channels=out_channels,
out_norm_cfg=norm_cfg)
# sum(p5, gp(p4_out, p3_out)) -> p5_out
stage['gp_43_5'] = GlobalPoolingCell(with_out_conv=False)
stage['sum_55_5'] = SumCell(
in_channels=out_channels,
out_channels=out_channels,
out_norm_cfg=norm_cfg)
# sum(p7, gp(p5_out, p4_2)) -> p7_out
stage['gp_54_7'] = GlobalPoolingCell(with_out_conv=False)
stage['sum_77_7'] = SumCell(
in_channels=out_channels,
out_channels=out_channels,
out_norm_cfg=norm_cfg)
# gp(p7_out, p5_out) -> p6_out
stage['gp_75_6'] = GlobalPoolingCell(
in_channels=out_channels,
out_channels=out_channels,
out_norm_cfg=norm_cfg)
self.fpn_stages.append(stage)
def init_weights(self):
"""Initialize the weights of module."""
for m in self.modules():
if isinstance(m, nn.Conv2d):
caffe2_xavier_init(m)
def forward(self, inputs):
"""Forward function."""
# build P3-P5
feats = [
lateral_conv(inputs[i + self.start_level])
for i, lateral_conv in enumerate(self.lateral_convs)
]
# build P6-P7 on top of P5
for downsample in self.extra_downsamples:
feats.append(downsample(feats[-1]))
p3, p4, p5, p6, p7 = feats
for stage in self.fpn_stages:
# gp(p6, p4) -> p4_1
p4_1 = stage['gp_64_4'](p6, p4, out_size=p4.shape[-2:])
# sum(p4_1, p4) -> p4_2
p4_2 = stage['sum_44_4'](p4_1, p4, out_size=p4.shape[-2:])
# sum(p4_2, p3) -> p3_out
p3 = stage['sum_43_3'](p4_2, p3, out_size=p3.shape[-2:])
# sum(p3_out, p4_2) -> p4_out
p4 = stage['sum_34_4'](p3, p4_2, out_size=p4.shape[-2:])
# sum(p5, gp(p4_out, p3_out)) -> p5_out
p5_tmp = stage['gp_43_5'](p4, p3, out_size=p5.shape[-2:])
p5 = stage['sum_55_5'](p5, p5_tmp, out_size=p5.shape[-2:])
# sum(p7, gp(p5_out, p4_2)) -> p7_out
p7_tmp = stage['gp_54_7'](p5, p4_2, out_size=p7.shape[-2:])
p7 = stage['sum_77_7'](p7, p7_tmp, out_size=p7.shape[-2:])
# gp(p7_out, p5_out) -> p6_out
p6 = stage['gp_75_6'](p7, p5, out_size=p6.shape[-2:])
return p3, p4, p5, p6, p7
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule, caffe2_xavier_init
from mmcv.ops.merge_cells import ConcatCell
from ..builder import NECKS
@NECKS.register_module()
class NASFCOS_FPN(nn.Module):
"""FPN structure in NASFPN.
Implementation of paper `NAS-FCOS: Fast Neural Architecture Search for
Object Detection <https://arxiv.org/abs/1906.04423>`_
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale)
num_outs (int): Number of output scales.
start_level (int): Index of the start input backbone level used to
build the feature pyramid. Default: 0.
end_level (int): Index of the end input backbone level (exclusive) to
build the feature pyramid. Default: -1, which means the last level.
add_extra_convs (bool): It decides whether to add conv
layers on top of the original feature maps. Default to False.
If True, its actual mode is specified by `extra_convs_on_inputs`.
conv_cfg (dict): dictionary to construct and config conv layer.
norm_cfg (dict): dictionary to construct and config norm layer.
"""
def __init__(self,
in_channels,
out_channels,
num_outs,
start_level=1,
end_level=-1,
add_extra_convs=False,
conv_cfg=None,
norm_cfg=None):
super(NASFCOS_FPN, self).__init__()
assert isinstance(in_channels, list)
self.in_channels = in_channels
self.out_channels = out_channels
self.num_ins = len(in_channels)
self.num_outs = num_outs
self.norm_cfg = norm_cfg
self.conv_cfg = conv_cfg
if end_level == -1:
self.backbone_end_level = self.num_ins
assert num_outs >= self.num_ins - start_level
else:
self.backbone_end_level = end_level
assert end_level <= len(in_channels)
assert num_outs == end_level - start_level
self.start_level = start_level
self.end_level = end_level
self.add_extra_convs = add_extra_convs
self.adapt_convs = nn.ModuleList()
for i in range(self.start_level, self.backbone_end_level):
adapt_conv = ConvModule(
in_channels[i],
out_channels,
1,
stride=1,
padding=0,
bias=False,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU', inplace=False))
self.adapt_convs.append(adapt_conv)
# C2 is omitted according to the paper
extra_levels = num_outs - self.backbone_end_level + self.start_level
def build_concat_cell(with_input1_conv, with_input2_conv):
cell_conv_cfg = dict(
kernel_size=1, padding=0, bias=False, groups=out_channels)
return ConcatCell(
in_channels=out_channels,
out_channels=out_channels,
with_out_conv=True,
out_conv_cfg=cell_conv_cfg,
out_norm_cfg=dict(type='BN'),
out_conv_order=('norm', 'act', 'conv'),
with_input1_conv=with_input1_conv,
with_input2_conv=with_input2_conv,
input_conv_cfg=conv_cfg,
input_norm_cfg=norm_cfg,
upsample_mode='nearest')
# Denote c3=f0, c4=f1, c5=f2 for convince
self.fpn = nn.ModuleDict()
self.fpn['c22_1'] = build_concat_cell(True, True)
self.fpn['c22_2'] = build_concat_cell(True, True)
self.fpn['c32'] = build_concat_cell(True, False)
self.fpn['c02'] = build_concat_cell(True, False)
self.fpn['c42'] = build_concat_cell(True, True)
self.fpn['c36'] = build_concat_cell(True, True)
self.fpn['c61'] = build_concat_cell(True, True) # f9
self.extra_downsamples = nn.ModuleList()
for i in range(extra_levels):
extra_act_cfg = None if i == 0 \
else dict(type='ReLU', inplace=False)
self.extra_downsamples.append(
ConvModule(
out_channels,
out_channels,
3,
stride=2,
padding=1,
act_cfg=extra_act_cfg,
order=('act', 'norm', 'conv')))
def forward(self, inputs):
"""Forward function."""
feats = [
adapt_conv(inputs[i + self.start_level])
for i, adapt_conv in enumerate(self.adapt_convs)
]
for (i, module_name) in enumerate(self.fpn):
idx_1, idx_2 = int(module_name[1]), int(module_name[2])
res = self.fpn[module_name](feats[idx_1], feats[idx_2])
feats.append(res)
ret = []
for (idx, input_idx) in zip([9, 8, 7], [1, 2, 3]): # add P3, P4, P5
feats1, feats2 = feats[idx], feats[5]
feats2_resize = F.interpolate(
feats2,
size=feats1.size()[2:],
mode='bilinear',
align_corners=False)
feats_sum = feats1 + feats2_resize
ret.append(
F.interpolate(
feats_sum,
size=inputs[input_idx].size()[2:],
mode='bilinear',
align_corners=False))
for submodule in self.extra_downsamples:
ret.append(submodule(ret[-1]))
return tuple(ret)
def init_weights(self):
"""Initialize the weights of module."""
for module in self.fpn.values():
if hasattr(module, 'conv_out'):
caffe2_xavier_init(module.out_conv.conv)
for modules in [
self.adapt_convs.modules(),
self.extra_downsamples.modules()
]:
for module in modules:
if isinstance(module, nn.Conv2d):
caffe2_xavier_init(module)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment