target assigner mods

0006ba72 · Kaushik Shivakumar · 31ca3b97 · 0006ba72
Commit 0006ba72 authored Jul 24, 2020 by Kaushik Shivakumar
Hide whitespace changes
Inline Side-by-side

Showing with 33 additions and 138 deletions

research/object_detection/core/target_assigner.py research/object_detection/core/target_assigner.py +33 -138

No files found.
--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -42,6 +42,7 @@ import tensorflow.compat.v2 as tf2
 from object_detection.box_coders import faster_rcnn_box_coder
 from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.box_coders import detr_box_coder
 from object_detection.core import box_coder
 from object_detection.core import box_list
 from object_detection.core import box_list_ops
@@ -57,6 +58,8 @@ from object_detection.utils import tf_version
 if tf_version.is_tf1():
  from object_detection.matchers import bipartite_matcher  # pylint: disable=g-import-not-at-top
+elif tf_version.is_tf2():
+  from object_detection.matchers import hungarian_matcher
 ResizeMethod = tf2.image.ResizeMethod
@@ -107,7 +110,8 @@ class TargetAssigner(object):
             groundtruth_boxes,
             groundtruth_labels=None,
             unmatched_class_label=None,
-             groundtruth_weights=None):
+             groundtruth_weights=None,
+             class_predictions=None):
    """Assign classification and regression targets to each anchor.
    For a given set of anchors and groundtruth detections, match anchors
@@ -160,6 +164,7 @@ class TargetAssigner(object):
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
    if not isinstance(anchors, box_list.BoxList):
      raise ValueError('anchors must be an BoxList')
    if not isinstance(groundtruth_boxes, box_list.BoxList):
@@ -194,10 +199,14 @@ class TargetAssigner(object):
    with tf.control_dependencies(
        [unmatched_shape_assert, labels_and_box_shapes_assert]):
      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
-                                                           anchors)
+                                                           anchors,
+                                                           groundtruth_labels=groundtruth_labels,
+                                                           predicted_labels=class_predictions)
      match = self._matcher.match(match_quality_matrix,
                                  valid_rows=tf.greater(groundtruth_weights, 0))
      reg_targets = self._create_regression_targets(anchors,
                                                    groundtruth_boxes,
                                                    match)
@@ -269,6 +278,7 @@ class TargetAssigner(object):
          ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
      matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
                                   matched_keypoints)
+    a, b, c, d = matched_gt_boxlist.get_center_coordinates_and_sizes()
    matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
    match_results_shape = shape_utils.combined_static_and_dynamic_shape(
        match.match_results)
@@ -432,7 +442,12 @@ def create_target_assigner(reference, stage=None,
                                           force_match_for_each_row=False,
                                           negatives_lower_than_unmatched=False,
                                           use_matmul_gather=use_matmul_gather)
-    box_coder_instance = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    box_coder_instance = detr_box_coder.DETRBoxCoder()
+  elif reference == 'DETR':
+    similarity_calc = sim_calc.IouAndClassSimilarity()
+    matcher = hungarian_matcher.HungarianBipartiteMatcher()
+    box_coder_instance = detr_box_coder.DETRBoxCoder()
  else:
    raise ValueError('No valid combination of reference and stage.')
@@ -446,7 +461,8 @@ def batch_assign(target_assigner,
                 gt_box_batch,
                 gt_class_targets_batch,
                 unmatched_class_label=None,
-                 gt_weights_batch=None):
+                 gt_weights_batch=None,
+                 class_predictions=None):
  """Batched assignment of classification and regression targets.
  Args:
@@ -506,11 +522,14 @@ def batch_assign(target_assigner,
  match_list = []
  if gt_weights_batch is None:
    gt_weights_batch = [None] * len(gt_class_targets_batch)
-  for anchors, gt_boxes, gt_class_targets, gt_weights in zip(
+  class_predictions = tf.unstack(class_predictions)
-      anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch):
+  for anchors, gt_boxes, gt_class_targets, gt_weights, class_preds in zip(
+      anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch,
+      class_predictions):
    (cls_targets, cls_weights,
     reg_targets, reg_weights, match) = target_assigner.assign(
-         anchors, gt_boxes, gt_class_targets, unmatched_class_label, gt_weights)
+         anchors, gt_boxes, gt_class_targets, unmatched_class_label, gt_weights,
+         class_preds)
    cls_targets_list.append(cls_targets)
    cls_weights_list.append(cls_weights)
    reg_targets_list.append(reg_targets)
@@ -1600,17 +1619,6 @@ class CenterNetKeypointTargetAssigner(object):
    return (batch_indices, batch_offsets, batch_weights)
-def _resize_masks(masks, height, width, method):
-  # Resize segmentation masks to conform to output dimensions. Use TF2
-  # image resize because TF1's version is buggy:
-  # https://yaqs.corp.google.com/eng/q/4970450458378240
-  masks = tf2.image.resize(
-      masks[:, :, :, tf.newaxis],
-      size=(height, width),
-      method=method)
-  return masks[:, :, :, 0]
 class CenterNetMaskTargetAssigner(object):
  """Wrapper to compute targets for segmentation masks."""
@@ -1652,9 +1660,13 @@ class CenterNetMaskTargetAssigner(object):
    segmentation_targets_list = []
    for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list):
-      gt_masks = _resize_masks(gt_masks, output_height, output_width,
+      # Resize segmentation masks to conform to output dimensions. Use TF2
-                               mask_resize_method)
+      # image resize because TF1's version is buggy:
-      gt_masks = gt_masks[:, :, :, tf.newaxis]
+      # https://yaqs.corp.google.com/eng/q/4970450458378240
+      gt_masks = tf2.image.resize(
+          gt_masks[:, :, :, tf.newaxis],
+          size=(output_height, output_width),
+          method=mask_resize_method)
      gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes])
      # Shape: [h, w, num_classes].
      segmentations_for_image = tf.reduce_max(
@@ -1778,120 +1790,3 @@ class CenterNetDensePoseTargetAssigner(object):
    batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
    batch_weights = tf.concat(batch_weights, axis=0)
    return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
-def filter_mask_overlap_min_area(masks):
-  """If a pixel belongs to 2 instances, remove it from the larger instance."""
-  num_instances = tf.shape(masks)[0]
-  def _filter_min_area():
-    """Helper function to filter non empty masks."""
-    areas = tf.reduce_sum(masks, axis=[1, 2], keepdims=True)
-    per_pixel_area = masks * areas
-    # Make sure background is ignored in argmin.
-    per_pixel_area = (masks * per_pixel_area +
-                      (1 - masks) * per_pixel_area.dtype.max)
-    min_index = tf.cast(tf.argmin(per_pixel_area, axis=0), tf.int32)
-    filtered_masks = (
-        tf.range(num_instances)[:, tf.newaxis, tf.newaxis]
-        ==
-        min_index[tf.newaxis, :, :]
-    )
-    return tf.cast(filtered_masks, tf.float32) * masks
-  return tf.cond(num_instances > 0, _filter_min_area,
-                 lambda: masks)
-def filter_mask_overlap(masks, method='min_area'):
-  if method == 'min_area':
-    return filter_mask_overlap_min_area(masks)
-  else:
-    raise ValueError('Unknown mask overlap filter type - {}'.format(method))
-class CenterNetCornerOffsetTargetAssigner(object):
-  """Wrapper to compute corner offsets for boxes using masks."""
-  def __init__(self, stride, overlap_resolution='min_area'):
-    """Initializes the corner offset target assigner.
-    Args:
-      stride: int, the stride of the network in output pixels.
-      overlap_resolution: string, specifies how we handle overlapping
-        instance masks. Currently only 'min_area' is supported which assigns
-        overlapping pixels to the instance with the minimum area.
-    """
-    self._stride = stride
-    self._overlap_resolution = overlap_resolution
-  def assign_corner_offset_targets(
-      self, gt_boxes_list, gt_masks_list):
-    """Computes the corner offset targets and foreground map.
-    For each pixel that is part of any object's foreground, this function
-    computes the relative offsets to the top-left and bottom-right corners of
-    that instance's bounding box. It also returns a foreground map to indicate
-    which pixels contain valid corner offsets.
-    Args:
-      gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
-        representing the groundtruth detection bounding boxes for each sample in
-        the batch. The coordinates are expected in normalized coordinates.
-      gt_masks_list: A list of float tensors with shape [num_boxes,
-        input_height, input_width] with values in {0, 1} representing instance
-        masks for each object.
-    Returns:
-      corner_offsets: A float tensor of shape [batch_size, height, width, 4]
-        containing, in order, the (y, x) offsets to the top left corner and
-        the (y, x) offsets to the bottom right corner for each foregroung pixel
-      foreground: A float tensor of shape [batch_size, height, width] in which
-        each pixel is set to 1 if it is a part of any instance's foreground
-        (and thus contains valid corner offsets) and 0 otherwise.
-    """
-    _, input_height, input_width = (
-        shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0]))
-    output_height = input_height // self._stride
-    output_width = input_width // self._stride
-    y_grid, x_grid = tf.meshgrid(
-        tf.range(output_height), tf.range(output_width),
-        indexing='ij')
-    y_grid, x_grid = tf.cast(y_grid, tf.float32), tf.cast(x_grid, tf.float32)
-    corner_targets = []
-    foreground_targets = []
-    for gt_masks, gt_boxes in zip(gt_masks_list, gt_boxes_list):
-      gt_masks = _resize_masks(gt_masks, output_height, output_width,
-                               method=ResizeMethod.NEAREST_NEIGHBOR)
-      gt_masks = filter_mask_overlap(gt_masks, self._overlap_resolution)
-      ymin, xmin, ymax, xmax = tf.unstack(gt_boxes, axis=1)
-      ymin, ymax = ymin * output_height, ymax * output_height
-      xmin, xmax = xmin * output_width, xmax * output_width
-      top_y = ymin[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
-      left_x = xmin[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
-      bottom_y = ymax[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
-      right_x = xmax[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
-      foreground_target = tf.cast(tf.reduce_sum(gt_masks, axis=0) > 0.5,
-                                  tf.float32)
-      foreground_targets.append(foreground_target)
-      corner_target = tf.stack([
-          tf.reduce_sum(top_y * gt_masks, axis=0),
-          tf.reduce_sum(left_x * gt_masks, axis=0),
-          tf.reduce_sum(bottom_y * gt_masks, axis=0),
-          tf.reduce_sum(right_x * gt_masks, axis=0),
-      ], axis=2)
-      corner_targets.append(corner_target)
-    return (tf.stack(corner_targets, axis=0),
-            tf.stack(foreground_targets, axis=0))