# TODO merge naive and weighted loss. import numpy as np import torch import torch.nn.functional as F from ..bbox import bbox_overlaps from ...ops import sigmoid_focal_loss def weighted_nll_loss(pred, label, weight, avg_factor=None): if avg_factor is None: avg_factor = max(torch.sum(weight > 0).float().item(), 1.) raw = F.nll_loss(pred, label, reduction='none') return torch.sum(raw * weight)[None] / avg_factor def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=True): if avg_factor is None: avg_factor = max(torch.sum(weight > 0).float().item(), 1.) raw = F.cross_entropy(pred, label, reduction='none') if reduce: return torch.sum(raw * weight)[None] / avg_factor else: return raw * weight / avg_factor def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None): if pred.dim() != label.dim(): label, weight = _expand_binary_labels(label, weight, pred.size(-1)) if avg_factor is None: avg_factor = max(torch.sum(weight > 0).float().item(), 1.) return F.binary_cross_entropy_with_logits( pred, label.float(), weight.float(), reduction='sum')[None] / avg_factor def py_sigmoid_focal_loss(pred, target, weight, gamma=2.0, alpha=0.25, reduction='mean'): pred_sigmoid = pred.sigmoid() target = target.type_as(pred) pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) weight = (alpha * target + (1 - alpha) * (1 - target)) * weight weight = weight * pt.pow(gamma) loss = F.binary_cross_entropy_with_logits( pred, target, reduction='none') * weight reduction_enum = F._Reduction.get_enum(reduction) # none: 0, mean:1, sum: 2 if reduction_enum == 0: return loss elif reduction_enum == 1: return loss.mean() elif reduction_enum == 2: return loss.sum() def weighted_sigmoid_focal_loss(pred, target, weight, gamma=2.0, alpha=0.25, avg_factor=None, num_classes=80): if avg_factor is None: avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 return torch.sum( sigmoid_focal_loss(pred, target, gamma, alpha, 'none') * weight.view(-1, 1))[None] / avg_factor def mask_cross_entropy(pred, target, label): num_rois = pred.size()[0] inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) pred_slice = pred[inds, label].squeeze(1) return F.binary_cross_entropy_with_logits( pred_slice, target, reduction='mean')[None] def smooth_l1_loss(pred, target, beta=1.0, reduction='mean'): assert beta > 0 assert pred.size() == target.size() and target.numel() > 0 diff = torch.abs(pred - target) loss = torch.where(diff < beta, 0.5 * diff * diff / beta, diff - 0.5 * beta) reduction_enum = F._Reduction.get_enum(reduction) # none: 0, mean:1, sum: 2 if reduction_enum == 0: return loss elif reduction_enum == 1: return loss.sum() / pred.numel() elif reduction_enum == 2: return loss.sum() def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None): if avg_factor is None: avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 loss = smooth_l1_loss(pred, target, beta, reduction='none') return torch.sum(loss * weight)[None] / avg_factor def balanced_l1_loss(pred, target, beta=1.0, alpha=0.5, gamma=1.5, reduction='none'): assert beta > 0 assert pred.size() == target.size() and target.numel() > 0 diff = torch.abs(pred - target) b = np.e**(gamma / alpha) - 1 loss = torch.where( diff < beta, alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, gamma * diff + gamma / b - alpha * beta) reduction_enum = F._Reduction.get_enum(reduction) # none: 0, elementwise_mean:1, sum: 2 if reduction_enum == 0: return loss elif reduction_enum == 1: return loss.sum() / pred.numel() elif reduction_enum == 2: return loss.sum() return loss def weighted_balanced_l1_loss(pred, target, weight, beta=1.0, alpha=0.5, gamma=1.5, avg_factor=None): if avg_factor is None: avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 loss = balanced_l1_loss(pred, target, beta, alpha, gamma, reduction='none') return torch.sum(loss * weight)[None] / avg_factor def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3, reduction='mean'): """Improving Object Localization with Fitness NMS and Bounded IoU Loss, https://arxiv.org/abs/1711.00164. Args: pred (tensor): Predicted bboxes. target (tensor): Target bboxes. beta (float): beta parameter in smoothl1. eps (float): eps to avoid NaN. reduction (str): Reduction type. """ pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5 pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5 pred_w = pred[:, 2] - pred[:, 0] + 1 pred_h = pred[:, 3] - pred[:, 1] + 1 with torch.no_grad(): target_ctrx = (target[:, 0] + target[:, 2]) * 0.5 target_ctry = (target[:, 1] + target[:, 3]) * 0.5 target_w = target[:, 2] - target[:, 0] + 1 target_h = target[:, 3] - target[:, 1] + 1 dx = target_ctrx - pred_ctrx dy = target_ctry - pred_ctry loss_dx = 1 - torch.max( (target_w - 2 * dx.abs()) / (target_w + 2 * dx.abs() + eps), torch.zeros_like(dx)) loss_dy = 1 - torch.max( (target_h - 2 * dy.abs()) / (target_h + 2 * dy.abs() + eps), torch.zeros_like(dy)) loss_dw = 1 - torch.min(target_w / (pred_w + eps), pred_w / (target_w + eps)) loss_dh = 1 - torch.min(target_h / (pred_h + eps), pred_h / (target_h + eps)) loss_comb = torch.stack([loss_dx, loss_dy, loss_dw, loss_dh], dim=-1).view(loss_dx.size(0), -1) loss = torch.where(loss_comb < beta, 0.5 * loss_comb * loss_comb / beta, loss_comb - 0.5 * beta) reduction_enum = F._Reduction.get_enum(reduction) # none: 0, mean:1, sum: 2 if reduction_enum == 0: return loss elif reduction_enum == 1: return loss.sum() / pred.numel() elif reduction_enum == 2: return loss.sum() def weighted_iou_loss(pred, target, weight, style='naive', beta=0.2, eps=1e-3, avg_factor=None): if style not in ['bounded', 'naive']: raise ValueError('Only support bounded iou loss and naive iou loss.') inds = torch.nonzero(weight[:, 0] > 0) if avg_factor is None: avg_factor = inds.numel() + 1e-6 if inds.numel() > 0: inds = inds.squeeze(1) else: return (pred * weight).sum()[None] / avg_factor if style == 'bounded': loss = bounded_iou_loss( pred[inds], target[inds], beta=beta, eps=eps, reduction='sum') else: loss = iou_loss(pred[inds], target[inds], eps=eps, reduction='sum') loss = loss[None] / avg_factor return loss def accuracy(pred, target, topk=1): if isinstance(topk, int): topk = (topk, ) return_single = True else: return_single = False maxk = max(topk) _, pred_label = pred.topk(maxk, 1, True, True) pred_label = pred_label.t() correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / pred.size(0))) return res[0] if return_single else res def _expand_binary_labels(labels, label_weights, label_channels): bin_labels = labels.new_full((labels.size(0), label_channels), 0) inds = torch.nonzero(labels >= 1).squeeze() if inds.numel() > 0: bin_labels[inds, labels[inds] - 1] = 1 bin_label_weights = label_weights.view(-1, 1).expand( label_weights.size(0), label_channels) return bin_labels, bin_label_weights def iou_loss(pred_bboxes, target_bboxes, eps=1e-6, reduction='mean'): ious = bbox_overlaps(pred_bboxes, target_bboxes, is_aligned=True) + eps loss = -ious.log() reduction_enum = F._Reduction.get_enum(reduction) if reduction_enum == 0: return loss elif reduction_enum == 1: return loss.mean() elif reduction_enum == 2: return loss.sum()