Internal change

PiperOrigin-RevId: 405975992

Internal change
PiperOrigin-RevId: 405975992
9e9e21f3 · Xianzhi Du · A. Unique TensorFlower · 33f054a2 · 9e9e21f3 · 9e9e21f3
Commit 9e9e21f3 authored Oct 27, 2021 by Xianzhi Du Committed by A. Unique TensorFlower Oct 27, 2021
6 changed files
--- a/official/vision/beta/configs/maskrcnn.py
+++ b/official/vision/beta/configs/maskrcnn.py
@@ -133,6 +133,7 @@ class DetectionGenerator(hyperparams.Config):
  max_num_detections: int = 100
  nms_version: str = 'v2'  # `v2`, `v1`, `batched`
  use_cpu_nms: bool = False
+  soft_nms_sigma: Optional[float] = None  # Only works when nms_version='v1'.


 @dataclasses.dataclass

--- a/official/vision/beta/configs/retinanet.py
+++ b/official/vision/beta/configs/retinanet.py
@@ -114,6 +114,7 @@ class DetectionGenerator(hyperparams.Config):
  max_num_detections: int = 100
  nms_version: str = 'v2'  # `v2`, `v1`, `batched`.
  use_cpu_nms: bool = False
+  soft_nms_sigma: Optional[float] = None  # Only works when nms_version='v1'.


 @dataclasses.dataclass

--- a/official/vision/beta/modeling/factory.py
+++ b/official/vision/beta/modeling/factory.py
@@ -198,7 +198,8 @@ def build_maskrcnn(
      nms_iou_threshold=generator_config.nms_iou_threshold,
      max_num_detections=generator_config.max_num_detections,
      nms_version=generator_config.nms_version,
-      use_cpu_nms=generator_config.use_cpu_nms)
+      use_cpu_nms=generator_config.use_cpu_nms,
+      soft_nms_sigma=generator_config.soft_nms_sigma)

  if model_config.include_mask:
    mask_head = instance_heads.MaskHead(
@@ -301,7 +302,8 @@ def build_retinanet(
      nms_iou_threshold=generator_config.nms_iou_threshold,
      max_num_detections=generator_config.max_num_detections,
      nms_version=generator_config.nms_version,
-      use_cpu_nms=generator_config.use_cpu_nms)
+      use_cpu_nms=generator_config.use_cpu_nms,
+      soft_nms_sigma=generator_config.soft_nms_sigma)

  model = retinanet_model.RetinaNetModel(
      backbone,

--- a/official/vision/beta/modeling/layers/detection_generator.py
+++ b/official/vision/beta/modeling/layers/detection_generator.py
@@ -20,6 +20,7 @@ import tensorflow as tf

 from official.vision.beta.ops import box_ops
 from official.vision.beta.ops import nms
+from official.vision.beta.ops import preprocess_ops


 def _generate_detections_v1(boxes: tf.Tensor,
@@ -29,7 +30,8 @@ def _generate_detections_v1(boxes: tf.Tensor,
                            pre_nms_top_k: int = 5000,
                            pre_nms_score_threshold: float = 0.05,
                            nms_iou_threshold: float = 0.5,
-                            max_num_detections: int = 100):
+                            max_num_detections: int = 100,
+                            soft_nms_sigma: Optional[float] = None):
  """Generates the final detections given the model outputs.

  The implementation unrolls the batch dimension and process images one by one.
@@ -58,6 +60,8 @@ def _generate_detections_v1(boxes: tf.Tensor,
      boxes overlap too much with respect to IOU.
    max_num_detections: A scalar representing maximum number of boxes retained
      over all classes.
+    soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
+      When soft_nms_sigma=0.0 (which is default), we fall back to standard NMS.

  Returns:
    nms_boxes: A `float` type `tf.Tensor` of shape
@@ -99,7 +103,8 @@ def _generate_detections_v1(boxes: tf.Tensor,
           pre_nms_top_k=pre_nms_top_k,
           pre_nms_score_threshold=pre_nms_score_threshold,
           nms_iou_threshold=nms_iou_threshold,
-           max_num_detections=max_num_detections)
+           max_num_detections=max_num_detections,
+           soft_nms_sigma=soft_nms_sigma)
      nmsed_boxes.append(nmsed_boxes_i)
      nmsed_scores.append(nmsed_scores_i)
      nmsed_classes.append(nmsed_classes_i)
@@ -126,7 +131,8 @@ def _generate_detections_per_image(
    pre_nms_top_k: int = 5000,
    pre_nms_score_threshold: float = 0.05,
    nms_iou_threshold: float = 0.5,
-    max_num_detections: int = 100):
+    max_num_detections: int = 100,
+    soft_nms_sigma: Optional[float] = None):
  """Generates the final detections per image given the model outputs.

  Args:
@@ -149,6 +155,9 @@ def _generate_detections_per_image(
      boxes overlap too much with respect to IOU.
    max_num_detections: A `scalar` representing maximum number of boxes retained
      over all classes.
+    soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
+      When soft_nms_sigma=0.0, we fall back to standard NMS.
+      If set to None, `tf.image.non_max_suppression_padded` is called instead.

  Returns:
    nms_boxes: A `float` tf.Tensor of shape `[max_num_detections, 4]`
@@ -182,21 +191,38 @@ def _generate_detections_per_image(
        scores_i, k=tf.minimum(tf.shape(scores_i)[-1], pre_nms_top_k))
    boxes_i = tf.gather(boxes_i, indices)

-    (nmsed_indices_i,
-     nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
-         tf.cast(boxes_i, tf.float32),
-         tf.cast(scores_i, tf.float32),
-         max_num_detections,
-         iou_threshold=nms_iou_threshold,
-         score_threshold=pre_nms_score_threshold,
-         pad_to_max_output_size=True,
-         name='nms_detections_' + str(i))
-    nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
-    nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i)
-    # Sets scores of invalid boxes to -1.
-    nmsed_scores_i = tf.where(
-        tf.less(tf.range(max_num_detections), [nmsed_num_valid_i]),
-        nmsed_scores_i, -tf.ones_like(nmsed_scores_i))
+    if soft_nms_sigma is not None:
+      (nmsed_indices_i,
+       nmsed_scores_i) = tf.image.non_max_suppression_with_scores(
+           tf.cast(boxes_i, tf.float32),
+           tf.cast(scores_i, tf.float32),
+           max_num_detections,
+           iou_threshold=nms_iou_threshold,
+           score_threshold=pre_nms_score_threshold,
+           soft_nms_sigma=soft_nms_sigma,
+           name='nms_detections_' + str(i))
+      nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
+      nmsed_boxes_i = preprocess_ops.clip_or_pad_to_fixed_size(
+          nmsed_boxes_i, max_num_detections, 0.0)
+      nmsed_scores_i = preprocess_ops.clip_or_pad_to_fixed_size(
+          nmsed_scores_i, max_num_detections, -1.0)
+    else:
+      (nmsed_indices_i,
+       nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
+           tf.cast(boxes_i, tf.float32),
+           tf.cast(scores_i, tf.float32),
+           max_num_detections,
+           iou_threshold=nms_iou_threshold,
+           score_threshold=pre_nms_score_threshold,
+           pad_to_max_output_size=True,
+           name='nms_detections_' + str(i))
+      nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
+      nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i)
+      # Sets scores of invalid boxes to -1.
+      nmsed_scores_i = tf.where(
+          tf.less(tf.range(max_num_detections), [nmsed_num_valid_i]),
+          nmsed_scores_i, -tf.ones_like(nmsed_scores_i))
+
    nmsed_classes_i = tf.fill([max_num_detections], i)
    nmsed_boxes.append(nmsed_boxes_i)
    nmsed_scores.append(nmsed_scores_i)
@@ -207,6 +233,8 @@ def _generate_detections_per_image(
        att_i = att[:, min(num_classes_for_attr - 1, i)]
        att_i = tf.gather(att_i, indices)
        nmsed_att_i = tf.gather(att_i, nmsed_indices_i)
+        nmsed_att_i = preprocess_ops.clip_or_pad_to_fixed_size(
+            nmsed_att_i, max_num_detections, 0.0)
        nmsed_attributes[att_name].append(nmsed_att_i)

  # Concats results from all classes and sort them.
@@ -407,6 +435,7 @@ class DetectionGenerator(tf.keras.layers.Layer):
               max_num_detections: int = 100,
               nms_version: str = 'v2',
               use_cpu_nms: bool = False,
+               soft_nms_sigma: Optional[float] = None,
               **kwargs):
    """Initializes a detection generator.

@@ -423,6 +452,8 @@ class DetectionGenerator(tf.keras.layers.Layer):
        generate.
      nms_version: A string of `batched`, `v1` or `v2` specifies NMS version.
      use_cpu_nms: A `bool` of whether or not enforce NMS to run on CPU.
+      soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
+        When soft_nms_sigma=0.0, we fall back to standard NMS.
      **kwargs: Additional keyword arguments passed to Layer.
    """
    self._config_dict = {
@@ -433,6 +464,7 @@ class DetectionGenerator(tf.keras.layers.Layer):
        'max_num_detections': max_num_detections,
        'nms_version': nms_version,
        'use_cpu_nms': use_cpu_nms,
+        'soft_nms_sigma': soft_nms_sigma,
    }
    super(DetectionGenerator, self).__init__(**kwargs)

@@ -540,7 +572,8 @@ class DetectionGenerator(tf.keras.layers.Layer):
                pre_nms_score_threshold=self
                ._config_dict['pre_nms_score_threshold'],
                nms_iou_threshold=self._config_dict['nms_iou_threshold'],
-                max_num_detections=self._config_dict['max_num_detections']))
+                max_num_detections=self._config_dict['max_num_detections'],
+                soft_nms_sigma=self._config_dict['soft_nms_sigma']))
      elif self._config_dict['nms_version'] == 'v2':
        (nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections) = (
            _generate_detections_v2(
@@ -585,6 +618,7 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
               max_num_detections: int = 100,
               nms_version: str = 'v1',
               use_cpu_nms: bool = False,
+               soft_nms_sigma: Optional[float] = None,
               **kwargs):
    """Initializes a multi-level detection generator.

@@ -601,6 +635,8 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
        generate.
      nms_version: A string of `batched`, `v1` or `v2` specifies NMS version
      use_cpu_nms: A `bool` of whether or not enforce NMS to run on CPU.
+      soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
+        When soft_nms_sigma=0.0, we fall back to standard NMS.
      **kwargs: Additional keyword arguments passed to Layer.
    """
    self._config_dict = {
@@ -611,6 +647,7 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
        'max_num_detections': max_num_detections,
        'nms_version': nms_version,
        'use_cpu_nms': use_cpu_nms,
+        'soft_nms_sigma': soft_nms_sigma,
    }
    super(MultilevelDetectionGenerator, self).__init__(**kwargs)

@@ -778,7 +815,8 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
                 pre_nms_score_threshold=self
                 ._config_dict['pre_nms_score_threshold'],
                 nms_iou_threshold=self._config_dict['nms_iou_threshold'],
-                 max_num_detections=self._config_dict['max_num_detections']))
+                 max_num_detections=self._config_dict['max_num_detections'],
+                 soft_nms_sigma=self._config_dict['soft_nms_sigma']))
      elif self._config_dict['nms_version'] == 'v2':
        (nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections) = (
            _generate_detections_v2(

--- a/official/vision/beta/modeling/layers/detection_generator_test.py
+++ b/official/vision/beta/modeling/layers/detection_generator_test.py
@@ -44,9 +44,11 @@ class DetectionGeneratorTest(
    parameterized.TestCase, tf.test.TestCase):

  @parameterized.product(
-      nms_version=['batched', 'v1', 'v2'], use_cpu_nms=[True, False])
-  def testDetectionsOutputShape(self, nms_version, use_cpu_nms):
-    max_num_detections = 100
+      nms_version=['batched', 'v1', 'v2'],
+      use_cpu_nms=[True, False],
+      soft_nms_sigma=[None, 0.1])
+  def testDetectionsOutputShape(self, nms_version, use_cpu_nms, soft_nms_sigma):
+    max_num_detections = 10
    num_classes = 4
    pre_nms_top_k = 5000
    pre_nms_score_threshold = 0.01
@@ -59,6 +61,7 @@ class DetectionGeneratorTest(
        'max_num_detections': max_num_detections,
        'nms_version': nms_version,
        'use_cpu_nms': use_cpu_nms,
+        'soft_nms_sigma': soft_nms_sigma,
    }
    generator = detection_generator.DetectionGenerator(**kwargs)

@@ -99,6 +102,7 @@ class DetectionGeneratorTest(
        'max_num_detections': 10,
        'nms_version': 'v2',
        'use_cpu_nms': False,
+        'soft_nms_sigma': None,
    }
    generator = detection_generator.DetectionGenerator(**kwargs)

@@ -116,18 +120,20 @@ class MultilevelDetectionGeneratorTest(
    parameterized.TestCase, tf.test.TestCase):

  @parameterized.parameters(
-      ('batched', False, True),
-      ('batched', False, False),
-      ('v2', False, True),
-      ('v2', False, False),
-      ('v1', True, True),
-      ('v1', True, False),
+      ('batched', False, True, None),
+      ('batched', False, False, None),
+      ('v2', False, True, None),
+      ('v2', False, False, None),
+      ('v1', True, True, 0.0),
+      ('v1', True, False, 0.1),
+      ('v1', True, False, None),
  )
-  def testDetectionsOutputShape(self, nms_version, has_att_heads, use_cpu_nms):
+  def testDetectionsOutputShape(self, nms_version, has_att_heads, use_cpu_nms,
+                                soft_nms_sigma):
    min_level = 4
    max_level = 6
    num_scales = 2
-    max_num_detections = 100
+    max_num_detections = 10
    aspect_ratios = [1.0, 2.0]
    anchor_scale = 2.0
    output_size = [64, 64]
@@ -143,6 +149,7 @@ class MultilevelDetectionGeneratorTest(
        'max_num_detections': max_num_detections,
        'nms_version': nms_version,
        'use_cpu_nms': use_cpu_nms,
+        'soft_nms_sigma': soft_nms_sigma,
    }

    input_anchor = anchor.build_anchor_generator(min_level, max_level,
@@ -224,6 +231,7 @@ class MultilevelDetectionGeneratorTest(
        'max_num_detections': 10,
        'nms_version': 'v2',
        'use_cpu_nms': False,
+        'soft_nms_sigma': None,
    }
    generator = detection_generator.MultilevelDetectionGenerator(**kwargs)


--- a/official/vision/beta/modeling/retinanet_model_test.py
+++ b/official/vision/beta/modeling/retinanet_model_test.py
@@ -148,9 +148,10 @@ class RetinaNetTest(parameterized.TestCase, tf.test.TestCase):
          training=[True, False],
          has_att_heads=[True, False],
          output_intermediate_features=[True, False],
+          soft_nms_sigma=[None, 0.0, 0.1],
      ))
  def test_forward(self, strategy, image_size, training, has_att_heads,
-                   output_intermediate_features):
+                   output_intermediate_features, soft_nms_sigma):
    """Test for creation of a R50-FPN RetinaNet."""
    tf.keras.backend.set_image_data_format('channels_last')
    num_classes = 3
@@ -193,7 +194,10 @@ class RetinaNetTest(parameterized.TestCase, tf.test.TestCase):
          attribute_heads=attribute_heads,
          num_anchors_per_location=num_anchors_per_location)
      generator = detection_generator.MultilevelDetectionGenerator(
-          max_num_detections=10, nms_version='v1')
+          max_num_detections=10,
+          nms_version='v1',
+          use_cpu_nms=soft_nms_sigma is not None,
+          soft_nms_sigma=soft_nms_sigma)
      model = retinanet_model.RetinaNetModel(
          backbone=backbone,
          decoder=decoder,