"vscode:/vscode.git/clone" did not exist on "30ba39c9ade866feea982ff1992bf808195b7bc3"
Commit b2b8e216 authored by Zhicheng Yan's avatar Zhicheng Yan Committed by Facebook GitHub Bot
Browse files

fix a bug at inference and code refactoring

Summary:
Pull Request resolved: https://github.com/facebookresearch/d2go/pull/97

Major changes
- Fix a bug within `inference()` function
- Refactor code to remove redundant code between `SetCriterion` and `FocalLossSetCriterion`.

Reviewed By: zhanghang1989

Differential Revision: D29481067

fbshipit-source-id: 64788f1ff331177db964eb36d380430799d1d2f2
parent e830629a
...@@ -382,11 +382,9 @@ class Detr(nn.Module): ...@@ -382,11 +382,9 @@ class Detr(nn.Module):
result = Instances(image_size) result = Instances(image_size)
boxes = box_cxcywh_to_xyxy(box_pred_per_image) boxes = box_cxcywh_to_xyxy(box_pred_per_image)
if self.use_focal_loss: if self.use_focal_loss:
boxes = torch.gather( boxes = torch.gather(boxes, 0, topk_boxes[i].unsqueeze(-1).repeat(1, 4))
boxes.unsqueeze(0), 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 4)
).squeeze()
result.pred_boxes = Boxes(boxes)
result.pred_boxes = Boxes(boxes)
result.pred_boxes.scale(scale_x=image_size[1], scale_y=image_size[0]) result.pred_boxes.scale(scale_x=image_size[1], scale_y=image_size[0])
if self.mask_on: if self.mask_on:
mask = F.interpolate( mask = F.interpolate(
......
import copy import copy
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn from torch import nn
from ..util import box_ops from ..util import box_ops
from ..util.misc import (nested_tensor_from_tensor_list, from ..util.misc import (
accuracy, get_world_size, interpolate, nested_tensor_from_tensor_list,
is_dist_avail_and_initialized) accuracy,
get_world_size,
interpolate,
is_dist_avail_and_initialized,
)
from .segmentation import dice_loss, sigmoid_focal_loss from .segmentation import dice_loss, sigmoid_focal_loss
class SetCriterion(nn.Module): class SetCriterion(nn.Module):
""" This class computes the loss for DETR. """This class computes the loss for DETR.
The process happens in two steps: The process happens in two steps:
1) we compute hungarian assignment between ground truth boxes and the outputs of the model 1) we compute hungarian assignment between ground truth boxes and the outputs of the model
2) we supervise each pair of matched ground-truth / prediction (supervise class and box) 2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
""" """
def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses): def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses):
""" Create the criterion. """Create the criterion.
Parameters: Parameters:
num_classes: number of object categories, omitting the special no-object category num_classes: number of object categories, omitting the special no-object category
matcher: module able to compute a matching between targets and proposals matcher: module able to compute a matching between targets and proposals
...@@ -32,15 +39,15 @@ class SetCriterion(nn.Module): ...@@ -32,15 +39,15 @@ class SetCriterion(nn.Module):
self.losses = losses self.losses = losses
empty_weight = torch.ones(self.num_classes + 1) empty_weight = torch.ones(self.num_classes + 1)
empty_weight[-1] = self.eos_coef empty_weight[-1] = self.eos_coef
self.register_buffer('empty_weight', empty_weight) self.register_buffer("empty_weight", empty_weight)
def loss_labels(self, outputs, targets, indices, num_boxes, log=True): def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
"""Classification loss (NLL) """Classification loss (NLL)
targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
""" """
assert 'pred_logits' in outputs assert "pred_logits" in outputs
# shape (batch_size, num_queries, NUM_CLASS + 1) # shape (batch_size, num_queries, NUM_CLASS + 1)
src_logits = outputs['pred_logits'] src_logits = outputs["pred_logits"]
# idx = (batch_idx, src_idx) # idx = (batch_idx, src_idx)
# batch_idx shape [\sum_b num_match_b] # batch_idx shape [\sum_b num_match_b]
# src_idx shape [\sum_b num_match_b] # src_idx shape [\sum_b num_match_b]
...@@ -49,60 +56,75 @@ class SetCriterion(nn.Module): ...@@ -49,60 +56,75 @@ class SetCriterion(nn.Module):
# "labels": [NUM_BOX,] # "labels": [NUM_BOX,]
# "boxes": [NUM_BOX, 4] # "boxes": [NUM_BOX, 4]
# target_classes_o shape [batch_size * num_match] # target_classes_o shape [batch_size * num_match]
target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) target_classes_o = torch.cat(
[t["labels"][J] for t, (_, J) in zip(targets, indices)]
)
# shape (batch_size, num_queries) # shape (batch_size, num_queries)
target_classes = torch.full(src_logits.shape[:2], self.num_classes, target_classes = torch.full(
dtype=torch.int64, device=src_logits.device) src_logits.shape[:2],
self.num_classes,
dtype=torch.int64,
device=src_logits.device,
)
target_classes[idx] = target_classes_o target_classes[idx] = target_classes_o
loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight) loss_ce = F.cross_entropy(
losses = {'loss_ce': loss_ce} src_logits.transpose(1, 2), target_classes, self.empty_weight
)
losses = {"loss_ce": loss_ce}
if log: if log:
# TODO this should probably be a separate loss, not hacked in this one here # TODO this should probably be a separate loss, not hacked in this one here
losses['class_error'] = 100 - accuracy(src_logits[idx], target_classes_o)[0] losses["class_error"] = 100 - accuracy(src_logits[idx], target_classes_o)[0]
return losses return losses
@torch.no_grad() @torch.no_grad()
def loss_cardinality(self, outputs, targets, indices, num_boxes): def loss_cardinality(self, outputs, targets, indices, num_boxes):
""" Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes """Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes
This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients
""" """
pred_logits = outputs['pred_logits'] pred_logits = outputs["pred_logits"]
device = pred_logits.device device = pred_logits.device
tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device) tgt_lengths = torch.as_tensor(
[len(v["labels"]) for v in targets], device=device
)
# Count the number of predictions that are NOT "no-object" (which is the last class) # Count the number of predictions that are NOT "no-object" (which is the last class)
card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1) card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1)
card_err = F.l1_loss(card_pred.float(), tgt_lengths.float()) card_err = F.l1_loss(card_pred.float(), tgt_lengths.float())
losses = {'cardinality_error': card_err} losses = {"cardinality_error": card_err}
return losses return losses
def loss_boxes(self, outputs, targets, indices, num_boxes): def loss_boxes(self, outputs, targets, indices, num_boxes):
"""Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4] targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size. The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
""" """
assert 'pred_boxes' in outputs assert "pred_boxes" in outputs
idx = self._get_src_permutation_idx(indices) idx = self._get_src_permutation_idx(indices)
# shape [\sum_b num_matches_b, 4] # shape [\sum_b num_matches_b, 4]
src_boxes = outputs['pred_boxes'][idx] src_boxes = outputs["pred_boxes"][idx]
# shape [\sum_b num_matches_b, 4] # shape [\sum_b num_matches_b, 4]
target_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0) target_boxes = torch.cat(
[t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0
)
loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none') loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction="none")
losses = {} losses = {}
losses['loss_bbox'] = loss_bbox.sum() / num_boxes losses["loss_bbox"] = loss_bbox.sum() / num_boxes
loss_giou = 1 - torch.diag(box_ops.generalized_box_iou( loss_giou = 1 - torch.diag(
box_ops.box_cxcywh_to_xyxy(src_boxes), box_ops.generalized_box_iou(
box_ops.box_cxcywh_to_xyxy(target_boxes))) box_ops.box_cxcywh_to_xyxy(src_boxes),
losses['loss_giou'] = loss_giou.sum() / num_boxes box_ops.box_cxcywh_to_xyxy(target_boxes),
)
)
losses["loss_giou"] = loss_giou.sum() / num_boxes
return losses return losses
def loss_masks(self, outputs, targets, indices, num_boxes): def loss_masks(self, outputs, targets, indices, num_boxes):
"""Compute the losses related to the masks: the focal loss and the dice loss. """Compute the losses related to the masks: the focal loss and the dice loss.
targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w] targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w]
""" """
assert "pred_masks" in outputs assert "pred_masks" in outputs
...@@ -117,8 +139,12 @@ class SetCriterion(nn.Module): ...@@ -117,8 +139,12 @@ class SetCriterion(nn.Module):
target_masks = target_masks[tgt_idx] target_masks = target_masks[tgt_idx]
# upsample predictions to the target size # upsample predictions to the target size
src_masks = interpolate(src_masks[:, None], size=target_masks.shape[-2:], src_masks = interpolate(
mode="bilinear", align_corners=False) src_masks[:, None],
size=target_masks.shape[-2:],
mode="bilinear",
align_corners=False,
)
src_masks = src_masks[:, 0].flatten(1) src_masks = src_masks[:, 0].flatten(1)
target_masks = target_masks.flatten(1) target_masks = target_masks.flatten(1)
...@@ -131,36 +157,31 @@ class SetCriterion(nn.Module): ...@@ -131,36 +157,31 @@ class SetCriterion(nn.Module):
def _get_src_permutation_idx(self, indices): def _get_src_permutation_idx(self, indices):
# permute predictions following indices # permute predictions following indices
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) # shape [\sum_b num_match_b] batch_idx = torch.cat(
src_idx = torch.cat([src for (src, _) in indices]) # shape [\sum_b num_match_b] [torch.full_like(src, i) for i, (src, _) in enumerate(indices)]
) # shape [\sum_b num_match_b]
src_idx = torch.cat([src for (src, _) in indices]) # shape [\sum_b num_match_b]
return batch_idx, src_idx return batch_idx, src_idx
def _get_tgt_permutation_idx(self, indices): def _get_tgt_permutation_idx(self, indices):
# permute targets following indices # permute targets following indices
batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)]) # shape [\sum_b num_match_b] batch_idx = torch.cat(
[torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)]
) # shape [\sum_b num_match_b]
tgt_idx = torch.cat([tgt for (_, tgt) in indices]) # shape [\sum_b num_match_b] tgt_idx = torch.cat([tgt for (_, tgt) in indices]) # shape [\sum_b num_match_b]
return batch_idx, tgt_idx return batch_idx, tgt_idx
def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs): def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
loss_map = { loss_map = {
'labels': self.loss_labels, "labels": self.loss_labels,
'cardinality': self.loss_cardinality, "cardinality": self.loss_cardinality,
'boxes': self.loss_boxes, "boxes": self.loss_boxes,
'masks': self.loss_masks "masks": self.loss_masks,
} }
assert loss in loss_map, f'do you really want to compute {loss} loss?' assert loss in loss_map, f"do you really want to compute {loss} loss?"
return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs) return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
def forward(self, outputs, targets): def _forward(self, outputs, outputs_without_aux, targets):
""" This performs the loss computation.
Parameters:
outputs: dict of tensors, see the output specification of the model for the format
targets: list of dicts, such that len(targets) == batch_size.
The expected keys in each dict depends on the losses applied, see each loss' doc
"""
# "pred_logits" shape (B, S, NUM_CLASS + 1)
# "pred_boxes" shape (B, S, 4)
outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs'}
# Retrieve the matching between the outputs of the last layer and the targets # Retrieve the matching between the outputs of the last layer and the targets
# A list where each item is [row_indices, col_indices] # A list where each item is [row_indices, col_indices]
...@@ -168,7 +189,9 @@ class SetCriterion(nn.Module): ...@@ -168,7 +189,9 @@ class SetCriterion(nn.Module):
# Compute the average number of target boxes accross all nodes, for normalization purposes # Compute the average number of target boxes accross all nodes, for normalization purposes
num_boxes = sum(len(t["labels"]) for t in targets) num_boxes = sum(len(t["labels"]) for t in targets)
num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) num_boxes = torch.as_tensor(
[num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device
)
if is_dist_avail_and_initialized(): if is_dist_avail_and_initialized():
torch.distributed.all_reduce(num_boxes) torch.distributed.all_reduce(num_boxes)
num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
...@@ -179,32 +202,46 @@ class SetCriterion(nn.Module): ...@@ -179,32 +202,46 @@ class SetCriterion(nn.Module):
losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes)) losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes))
# In case of auxiliary losses, we repeat this process with the output of each intermediate layer. # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
if 'aux_outputs' in outputs: if "aux_outputs" in outputs:
for i, aux_outputs in enumerate(outputs['aux_outputs']): for i, aux_outputs in enumerate(outputs["aux_outputs"]):
indices = self.matcher(aux_outputs, targets) indices = self.matcher(aux_outputs, targets)
for loss in self.losses: for loss in self.losses:
if loss == 'masks': if loss == "masks":
# Intermediate masks losses are too costly to compute, we ignore them. # Intermediate masks losses are too costly to compute, we ignore them.
continue continue
kwargs = {} kwargs = {}
if loss == 'labels': if loss == "labels":
# Logging is enabled only for the last layer # Logging is enabled only for the last layer
kwargs = {'log': False} kwargs = {"log": False}
l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = self.get_loss(
l_dict = {k + f'_{i}': v for k, v in l_dict.items()} loss, aux_outputs, targets, indices, num_boxes, **kwargs
)
l_dict = {k + f"_{i}": v for k, v in l_dict.items()}
losses.update(l_dict) losses.update(l_dict)
return losses return losses
def forward(self, outputs, targets):
"""This performs the loss computation.
Parameters:
outputs: dict of tensors, see the output specification of the model for the format
targets: list of dicts, such that len(targets) == batch_size.
The expected keys in each dict depends on the losses applied, see each loss' doc
"""
# "pred_logits" shape (B, S, NUM_CLASS + 1)
# "pred_boxes" shape (B, S, 4)
outputs_without_aux = {k: v for k, v in outputs.items() if k != "aux_outputs"}
return self._forward(outputs, outputs_without_aux, targets)
class FocalLossSetCriterion(nn.Module):
""" This class computes the loss for DETR. class FocalLossSetCriterion(SetCriterion):
"""This class computes the loss for DETR.
The process happens in two steps: The process happens in two steps:
1) we compute hungarian assignment between ground truth boxes and the outputs of the model 1) we compute hungarian assignment between ground truth boxes and the outputs of the model
2) we supervise each pair of matched ground-truth / prediction (supervise class and box) 2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
""" """
def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25): def __init__(self, num_classes, matcher, weight_dict, losses, focal_alpha=0.25):
""" Create the criterion. """Create the criterion.
Parameters: Parameters:
num_classes: number of object categories, omitting the special no-object category num_classes: number of object categories, omitting the special no-object category
matcher: module able to compute a matching between targets and proposals matcher: module able to compute a matching between targets and proposals
...@@ -212,182 +249,89 @@ class FocalLossSetCriterion(nn.Module): ...@@ -212,182 +249,89 @@ class FocalLossSetCriterion(nn.Module):
losses: list of all the losses to be applied. See get_loss for list of available losses. losses: list of all the losses to be applied. See get_loss for list of available losses.
focal_alpha: alpha in Focal Loss focal_alpha: alpha in Focal Loss
""" """
super().__init__() super().__init__(num_classes, matcher, weight_dict, 0, losses)
self.num_classes = num_classes
self.matcher = matcher
self.weight_dict = weight_dict
self.losses = losses
self.focal_alpha = focal_alpha self.focal_alpha = focal_alpha
def loss_labels(self, outputs, targets, indices, num_boxes, log=True): def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
"""Classification loss (NLL) """Classification loss (NLL)
targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
""" """
assert 'pred_logits' in outputs assert "pred_logits" in outputs
src_logits = outputs['pred_logits'] src_logits = outputs["pred_logits"]
idx = self._get_src_permutation_idx(indices) idx = self._get_src_permutation_idx(indices)
target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) target_classes_o = torch.cat(
target_classes = torch.full(src_logits.shape[:2], self.num_classes, [t["labels"][J] for t, (_, J) in zip(targets, indices)]
dtype=torch.int64, device=src_logits.device) )
target_classes = torch.full(
src_logits.shape[:2],
self.num_classes,
dtype=torch.int64,
device=src_logits.device,
)
target_classes[idx] = target_classes_o target_classes[idx] = target_classes_o
target_classes_onehot = torch.zeros([src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1], target_classes_onehot = torch.zeros(
dtype=src_logits.dtype, layout=src_logits.layout, device=src_logits.device) [src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1],
dtype=src_logits.dtype,
layout=src_logits.layout,
device=src_logits.device,
)
target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1) target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1)
target_classes_onehot = target_classes_onehot[:,:,:-1] target_classes_onehot = target_classes_onehot[:, :, :-1]
loss_ce = sigmoid_focal_loss(src_logits, target_classes_onehot, num_boxes, alpha=self.focal_alpha, gamma=2) * src_logits.shape[1] loss_ce = (
losses = {'loss_ce': loss_ce} sigmoid_focal_loss(
src_logits,
target_classes_onehot,
num_boxes,
alpha=self.focal_alpha,
gamma=2,
)
* src_logits.shape[1]
)
losses = {"loss_ce": loss_ce}
if log: if log:
# TODO this should probably be a separate loss, not hacked in this one here # TODO this should probably be a separate loss, not hacked in this one here
losses['class_error'] = 100 - accuracy(src_logits[idx], target_classes_o)[0] losses["class_error"] = 100 - accuracy(src_logits[idx], target_classes_o)[0]
return losses
@torch.no_grad()
def loss_cardinality(self, outputs, targets, indices, num_boxes):
""" Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes
This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients
"""
pred_logits = outputs['pred_logits']
device = pred_logits.device
tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device)
# Count the number of predictions that are NOT "no-object" (which is the last class)
card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1)
card_err = F.l1_loss(card_pred.float(), tgt_lengths.float())
losses = {'cardinality_error': card_err}
return losses
def loss_boxes(self, outputs, targets, indices, num_boxes):
"""Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
The target boxes are expected in format (center_x, center_y, h, w), normalized by the image size.
"""
assert 'pred_boxes' in outputs
idx = self._get_src_permutation_idx(indices)
src_boxes = outputs['pred_boxes'][idx]
target_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none')
losses = {}
losses['loss_bbox'] = loss_bbox.sum() / num_boxes
loss_giou = 1 - torch.diag(box_ops.generalized_box_iou(
box_ops.box_cxcywh_to_xyxy(src_boxes),
box_ops.box_cxcywh_to_xyxy(target_boxes)))
losses['loss_giou'] = loss_giou.sum() / num_boxes
return losses
def loss_masks(self, outputs, targets, indices, num_boxes):
"""Compute the losses related to the masks: the focal loss and the dice loss.
targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w]
"""
assert "pred_masks" in outputs
src_idx = self._get_src_permutation_idx(indices)
tgt_idx = self._get_tgt_permutation_idx(indices)
src_masks = outputs["pred_masks"]
# TODO use valid to mask invalid areas due to padding in loss
target_masks, valid = nested_tensor_from_tensor_list([t["masks"] for t in targets]).decompose()
target_masks = target_masks.to(src_masks)
src_masks = src_masks[src_idx]
# upsample predictions to the target size
src_masks = interpolate(src_masks[:, None], size=target_masks.shape[-2:],
mode="bilinear", align_corners=False)
src_masks = src_masks[:, 0].flatten(1)
target_masks = target_masks[tgt_idx].flatten(1)
losses = {
"loss_mask": sigmoid_focal_loss(src_masks, target_masks, num_boxes),
"loss_dice": dice_loss(src_masks, target_masks, num_boxes),
}
return losses return losses
def _get_src_permutation_idx(self, indices):
# permute predictions following indices
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
src_idx = torch.cat([src for (src, _) in indices])
return batch_idx, src_idx
def _get_tgt_permutation_idx(self, indices):
# permute targets following indices
batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
tgt_idx = torch.cat([tgt for (_, tgt) in indices])
return batch_idx, tgt_idx
def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
loss_map = {
'labels': self.loss_labels,
'cardinality': self.loss_cardinality,
'boxes': self.loss_boxes,
'masks': self.loss_masks
}
assert loss in loss_map, f'do you really want to compute {loss} loss?'
return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
def forward(self, outputs, targets): def forward(self, outputs, targets):
""" This performs the loss computation. """This performs the loss computation.
Parameters: Parameters:
outputs: dict of tensors, see the output specification of the model for the format outputs: dict of tensors, see the output specification of the model for the format
targets: list of dicts, such that len(targets) == batch_size. targets: list of dicts, such that len(targets) == batch_size.
The expected keys in each dict depends on the losses applied, see each loss' doc The expected keys in each dict depends on the losses applied, see each loss' doc
""" """
outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs' and k != 'enc_outputs'} outputs_without_aux = {
k: v
# Retrieve the matching between the outputs of the last layer and the targets for k, v in outputs.items()
indices = self.matcher(outputs_without_aux, targets) if k != "aux_outputs" and k != "enc_outputs"
}
# Compute the average number of target boxes accross all nodes, for normalization purposes
num_boxes = sum(len(t["labels"]) for t in targets)
num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
if is_dist_avail_and_initialized():
torch.distributed.all_reduce(num_boxes)
num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
# Compute all the requested losses losses = self._forward(outputs, outputs_without_aux, targets)
losses = {}
for loss in self.losses:
kwargs = {}
losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes, **kwargs))
# In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if "enc_outputs" in outputs:
if 'aux_outputs' in outputs: num_boxes = sum(len(t["labels"]) for t in targets)
for i, aux_outputs in enumerate(outputs['aux_outputs']):
indices = self.matcher(aux_outputs, targets)
for loss in self.losses:
if loss == 'masks':
# Intermediate masks losses are too costly to compute, we ignore them.
continue
kwargs = {}
if loss == 'labels':
# Logging is enabled only for the last layer
kwargs['log'] = False
l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs)
l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
losses.update(l_dict)
if 'enc_outputs' in outputs: enc_outputs = outputs["enc_outputs"]
enc_outputs = outputs['enc_outputs']
bin_targets = copy.deepcopy(targets) bin_targets = copy.deepcopy(targets)
for bt in bin_targets: for bt in bin_targets:
bt['labels'] = torch.zeros_like(bt['labels']) bt["labels"] = torch.zeros_like(bt["labels"])
indices = self.matcher(enc_outputs, bin_targets) indices = self.matcher(enc_outputs, bin_targets)
for loss in self.losses: for loss in self.losses:
if loss == 'masks': if loss == "masks":
# Intermediate masks losses are too costly to compute, we ignore them. # Intermediate masks losses are too costly to compute, we ignore them.
continue continue
kwargs = {} kwargs = {}
if loss == 'labels': if loss == "labels":
# Logging is enabled only for the last layer # Logging is enabled only for the last layer
kwargs['log'] = False kwargs["log"] = False
l_dict = self.get_loss(loss, enc_outputs, bin_targets, indices, num_boxes, **kwargs) l_dict = self.get_loss(
l_dict = {k + f'_enc': v for k, v in l_dict.items()} loss, enc_outputs, bin_targets, indices, num_boxes, **kwargs
)
l_dict = {k + "_enc": v for k, v in l_dict.items()}
losses.update(l_dict) losses.update(l_dict)
return losses return losses
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment