Internal change

PiperOrigin-RevId: 443422807

Internal change
PiperOrigin-RevId: 443422807
b526fb87 · A. Unique TensorFlower · 3c95b05e · b526fb87
Commit b526fb87 authored Apr 21, 2022 by A. Unique TensorFlower
Hide whitespace changes
Inline Side-by-side

Showing with 66 additions and 32 deletions

official/vision/ops/anchor.py official/vision/ops/anchor.py +66 -32

No files found.
--- a/official/vision/ops/anchor.py
+++ b/official/vision/ops/anchor.py
@@ -15,6 +15,7 @@
 """Anchor box and labeler definition."""

 import collections
+from typing import Dict, Optional, Tuple

 # Import libraries

@@ -65,7 +66,7 @@ class Anchor(object):
    self.image_size = image_size
    self.boxes = self._generate_boxes()

-  def _generate_boxes(self):
+  def _generate_boxes(self) -> tf.Tensor:
    """Generates multiscale anchor boxes.

    Returns:
@@ -100,7 +101,7 @@ class Anchor(object):
      boxes_all.append(boxes_l)
    return tf.concat(boxes_all, axis=0)

-  def unpack_labels(self, labels):
+  def unpack_labels(self, labels: tf.Tensor) -> Dict[str, tf.Tensor]:
    """Unpacks an array of labels into multiscales labels."""
    unpacked_labels = collections.OrderedDict()
    count = 0
@@ -146,17 +147,24 @@ class AnchorLabeler(object):
        force_match_for_each_col=True)
    self.box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()

-  def label_anchors(self,
-                    anchor_boxes,
-                    gt_boxes,
-                    gt_labels,
-                    gt_attributes=None,
-                    gt_weights=None):
+  def label_anchors(
+      self,
+      anchor_boxes: Dict[str, tf.Tensor],
+      gt_boxes: tf.Tensor,
+      gt_labels: tf.Tensor,
+      gt_attributes: Optional[Dict[str, tf.Tensor]] = None,
+      gt_weights: Optional[tf.Tensor] = None
+  ) -> Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor], Dict[str, Dict[
+      str, tf.Tensor]], tf.Tensor, tf.Tensor]:
    """Labels anchors with ground truth inputs.

    Args:
-      anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
-        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
+      anchor_boxes: An ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
+        and width_l represent the dimension of the feature pyramid at l-th
+        level. For each anchor box, the tensor stores [y0, x0, y1, x1] for the
+        four corners.
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
@@ -166,30 +174,29 @@ class AnchorLabeler(object):
        representing groundtruth attributes.
      gt_weights: If not None, a float tensor with shape [N] representing
        groundtruth weights.
+
    Returns:
-      cls_targets_dict: ordered dictionary with keys
+      cls_targets_dict: An ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location]. The height_l and
        width_l represent the dimension of class logits at l-th level.
-      box_targets_dict: ordered dictionary with keys
+      box_targets_dict: An ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
        and width_l represent the dimension of bounding box regression output at
        l-th level.
-      attribute_targets_dict: a dict with (name, attribute_targets) pairs. Each
+      attribute_targets_dict: A dict with (name, attribute_targets) pairs. Each
        `attribute_targets` represents an ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location * attribute_size].
        The height_l and width_l represent the dimension of attribute prediction
        output at l-th level.
-      cls_weights: A flattened Tensor with shape [batch_size, num_anchors], that
-        serves as masking / sample weight for classification loss. Its value
-        is 1.0 for positive and negative matched anchors, and 0.0 for ignored
-        anchors.
-      box_weights: A flattened Tensor with shape [batch_size, num_anchors], that
-        serves as masking / sample weight for regression loss. Its value is
-        1.0 for positive matched anchors, and 0.0 for negative and ignored
-        anchors.
+      cls_weights: A flattened Tensor with shape [num_anchors], that serves as
+        masking / sample weight for classification loss. Its value is 1.0 for
+        positive and negative matched anchors, and 0.0 for ignored anchors.
+      box_weights: A flattened Tensor with shape [num_anchors], that serves as
+        masking / sample weight for regression loss. Its value is 1.0 for
+        positive matched anchors, and 0.0 for negative and ignored anchors.
    """
    flattened_anchor_boxes = []
    for anchors in anchor_boxes.values():
@@ -286,25 +293,33 @@ class RpnAnchorLabeler(AnchorLabeler):
    return (ignore_labels + positive_labels + negative_labels,
            positive_labels, negative_labels)

-  def label_anchors(self, anchor_boxes, gt_boxes, gt_labels):
+  def label_anchors(
+      self, anchor_boxes: Dict[str, tf.Tensor], gt_boxes: tf.Tensor,
+      gt_labels: tf.Tensor
+  ) -> Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor]]:
    """Labels anchors with ground truth inputs.

    Args:
-      anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
-        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
+      anchor_boxes: An ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
+        and width_l represent the dimension of the feature pyramid at l-th
+        level. For each anchor box, the tensor stores [y0, x0, y1, x1] for the
+        four corners.
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.
+
    Returns:
-      score_targets_dict: ordered dictionary with keys
+      score_targets_dict: An ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
-        shape [height_l, width_l, num_anchors]. The height_l and width_l
-        represent the dimension of class logits at l-th level.
-      box_targets_dict: ordered dictionary with keys
+        shape [height_l, width_l, num_anchors_per_location]. The height_l and
+        width_l represent the dimension of class logits at l-th level.
+      box_targets_dict: An ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
-        shape [height_l, width_l, num_anchors * 4]. The height_l and
-        width_l represent the dimension of bounding box regression output at
+        shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
+        and width_l represent the dimension of bounding box regression output at
        l-th level.
    """
    flattened_anchor_boxes = []
@@ -362,8 +377,27 @@ def build_anchor_generator(min_level, max_level, num_scales, aspect_ratios,
  return anchor_gen


-def unpack_targets(targets, anchor_boxes_dict):
-  """Unpacks an array of labels into multiscales labels."""
+def unpack_targets(
+    targets: tf.Tensor,
+    anchor_boxes_dict: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
+  """Unpacks an array of labels into multiscales labels.
+
+  Args:
+    targets: A tensor with shape [num_anchors, M] representing the packed
+      targets with M values stored for each anchor.
+    anchor_boxes_dict: An ordered dictionary with keys
+      [min_level, min_level+1, ..., max_level]. The values are tensor with shape
+      [height_l, width_l, num_anchors_per_location * 4]. The height_l and
+      width_l represent the dimension of the feature pyramid at l-th level. For
+      each anchor box, the tensor stores [y0, x0, y1, x1] for the four corners.
+
+  Returns:
+    unpacked_targets: An ordered dictionary with keys
+      [min_level, min_level+1, ..., max_level]. The values are tensor with shape
+      [height_l, width_l, num_anchors_per_location * M]. The height_l and
+      width_l represent the dimension of the feature pyramid at l-th level. M is
+      the number of values stored for each anchor.
+  """
  unpacked_targets = collections.OrderedDict()
  count = 0
  for level, anchor_boxes in anchor_boxes_dict.items():