from typing import List import torch from torch import Tensor from mmdet.models.dense_heads import ATSSHead from mmdet.models.utils import images_to_levels, multi_apply from mmdet.registry import MODELS from mmdet.utils import InstanceList, OptInstanceList, reduce_mean @MODELS.register_module() class CoATSSHead(ATSSHead): def loss_by_feat( self, cls_scores: List[Tensor], bbox_preds: List[Tensor], centernesses: List[Tensor], batch_gt_instances: InstanceList, batch_img_metas: List[dict], batch_gt_instances_ignore: OptInstanceList = None) -> dict: """Calculate the loss based on the features extracted by the detection head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) centernesses (list[Tensor]): Centerness for each scale level with shape (N, num_anchors * 1, H, W) batch_gt_instances (list[:obj:`InstanceData`]): Batch of gt_instance. It usually includes ``bboxes`` and ``labels`` attributes. batch_img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional): Batch of gt_instances_ignore. It includes ``bboxes`` attribute data that is ignored during training and testing. Defaults to None. Returns: dict[str, Tensor]: A dictionary of loss components. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.prior_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, batch_img_metas, device=device) cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, batch_gt_instances, batch_img_metas, batch_gt_instances_ignore=batch_gt_instances_ignore) (anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, avg_factor, ori_anchors, ori_labels, ori_bbox_targets) = cls_reg_targets avg_factor = reduce_mean( torch.tensor(avg_factor, dtype=torch.float, device=device)).item() losses_cls, losses_bbox, loss_centerness, \ bbox_avg_factor = multi_apply( self.loss_by_feat_single, anchor_list, cls_scores, bbox_preds, centernesses, labels_list, label_weights_list, bbox_targets_list, avg_factor=avg_factor) bbox_avg_factor = sum(bbox_avg_factor) bbox_avg_factor = reduce_mean(bbox_avg_factor).clamp_(min=1).item() losses_bbox = list(map(lambda x: x / bbox_avg_factor, losses_bbox)) # diff pos_coords = (ori_anchors, ori_labels, ori_bbox_targets, 'atss') return dict( loss_cls=losses_cls, loss_bbox=losses_bbox, loss_centerness=loss_centerness, pos_coords=pos_coords) def get_targets(self, anchor_list: List[List[Tensor]], valid_flag_list: List[List[Tensor]], batch_gt_instances: InstanceList, batch_img_metas: List[dict], batch_gt_instances_ignore: OptInstanceList = None, unmap_outputs: bool = True) -> tuple: """Get targets for ATSS head. This method is almost the same as `AnchorHead.get_targets()`. Besides returning the targets as the parent method does, it also returns the anchors as the first element of the returned tuple. """ num_imgs = len(batch_img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] num_level_anchors_list = [num_level_anchors] * num_imgs # concat all level anchors and flags to a single tensor for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) anchor_list[i] = torch.cat(anchor_list[i]) valid_flag_list[i] = torch.cat(valid_flag_list[i]) # compute targets for each image if batch_gt_instances_ignore is None: batch_gt_instances_ignore = [None] * num_imgs (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list, sampling_results_list) = multi_apply( self._get_targets_single, anchor_list, valid_flag_list, num_level_anchors_list, batch_gt_instances, batch_img_metas, batch_gt_instances_ignore, unmap_outputs=unmap_outputs) # Get `avg_factor` of all images, which calculate in `SamplingResult`. # When using sampling method, avg_factor is usually the sum of # positive and negative priors. When using `PseudoSampler`, # `avg_factor` is usually equal to the number of positive priors. avg_factor = sum( [results.avg_factor for results in sampling_results_list]) # split targets to a list w.r.t. multiple levels anchors_list = images_to_levels(all_anchors, num_level_anchors) labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) # diff ori_anchors = all_anchors ori_labels = all_labels ori_bbox_targets = all_bbox_targets return (anchors_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, avg_factor, ori_anchors, ori_labels, ori_bbox_targets)