Fix the score_converter issue, which should apply on the dimension of...

Fix the score_converter issue, which should apply on the dimension of num_class_slots per anchor separately, not on all anchors per location together (e.g. think of softmax). PiperOrigin-RevId: 344143469

Fix the score_converter issue, which should apply on the dimension of...
Fix the score_converter issue, which should apply on the dimension of num_class_slots per anchor separately, not on all anchors per location together (e.g. think of softmax). PiperOrigin-RevId: 344143469
1ea84b7f · A. Unique TensorFlower · TF Object Detection Team · ff47e0d6 · 1ea84b7f · 1ea84b7f
Commit 1ea84b7f authored Nov 24, 2020 by A. Unique TensorFlower Committed by TF Object Detection Team Nov 24, 2020
4 changed files
--- a/research/object_detection/predictors/heads/class_head.py
+++ b/research/object_detection/predictors/heads/class_head.py
@@ -24,6 +24,7 @@ import tensorflow.compat.v1 as tf
 import tf_slim as slim

 from object_detection.predictors.heads import head
+from object_detection.utils import shape_utils


 class MaskRCNNClassHead(head.Head):
@@ -303,13 +304,23 @@ class WeightSharedConvolutionalClassHead(head.Head):
        biases_initializer=tf.constant_initializer(
            self._class_prediction_bias_init),
        scope=self._scope)
-    batch_size = features.get_shape().as_list()[0]
-    if batch_size is None:
-      batch_size = tf.shape(features)[0]
+    batch_size, height, width = shape_utils.combined_static_and_dynamic_shape(
+        features)[0:3]
+    class_predictions_with_background = tf.reshape(
+        class_predictions_with_background, [
+            batch_size, height, width, num_predictions_per_location,
+            self._num_class_slots
+        ])
    class_predictions_with_background = self._score_converter_fn(
        class_predictions_with_background)
    if self._return_flat_predictions:
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background,
          [batch_size, -1, self._num_class_slots])
+    else:
+      class_predictions_with_background = tf.reshape(
+          class_predictions_with_background, [
+              batch_size, height, width,
+              num_predictions_per_location * self._num_class_slots
+          ])
    return class_predictions_with_background
--- a/research/object_detection/predictors/heads/class_head_tf1_test.py
+++ b/research/object_detection/predictors/heads/class_head_tf1_test.py
@@ -15,6 +15,7 @@

 """Tests for object_detection.predictors.heads.class_head."""
 import unittest
+import numpy as np
 import tensorflow.compat.v1 as tf

 from google.protobuf import text_format
@@ -194,6 +195,37 @@ class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase):
      ])
      self.assertSetEqual(expected_var_names, actual_variable_set)

+  def test_softmax_score_converter(self):
+    num_class_slots = 10
+    batch_size = 2
+    height = 17
+    width = 19
+    num_predictions_per_location = 2
+    assert num_predictions_per_location != 1
+
+    def graph_fn():
+      class_prediction_head = (
+          class_head.WeightSharedConvolutionalClassHead(
+              num_class_slots=num_class_slots,
+              score_converter_fn=tf.nn.softmax))
+      image_feature = tf.random_uniform([batch_size, height, width, 1024],
+                                        minval=-10.0,
+                                        maxval=10.0,
+                                        dtype=tf.float32)
+      class_predictions = class_prediction_head.predict(
+          features=image_feature,
+          num_predictions_per_location=num_predictions_per_location)
+      return class_predictions
+
+    class_predictions_out = self.execute(graph_fn, [])
+    class_predictions_sum = np.sum(class_predictions_out, axis=-1)
+    num_anchors = height * width * num_predictions_per_location
+    exp_class_predictions_sum = np.ones((batch_size, num_anchors),
+                                        dtype=np.float32)
+    self.assertAllEqual((batch_size, num_anchors, num_class_slots),
+                        class_predictions_out.shape)
+    self.assertAllClose(class_predictions_sum, exp_class_predictions_sum)
+

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/heads/keras_class_head.py
+++ b/research/object_detection/predictors/heads/keras_class_head.py
@@ -22,6 +22,7 @@ All the class prediction heads have a predict function that receives the
 import tensorflow.compat.v1 as tf

 from object_detection.predictors.heads import head
+from object_detection.utils import shape_utils


 class ConvolutionalClassHead(head.KerasHead):
@@ -287,6 +288,7 @@ class WeightSharedConvolutionalClassHead(head.KerasHead):

    super(WeightSharedConvolutionalClassHead, self).__init__(name=name)
    self._num_class_slots = num_class_slots
+    self._num_predictions_per_location = num_predictions_per_location
    self._kernel_size = kernel_size
    self._class_prediction_bias_init = class_prediction_bias_init
    self._use_dropout = use_dropout
@@ -339,13 +341,23 @@ class WeightSharedConvolutionalClassHead(head.KerasHead):
    for layer in self._class_predictor_layers:
      class_predictions_with_background = layer(
          class_predictions_with_background)
-    batch_size = features.get_shape().as_list()[0]
-    if batch_size is None:
-      batch_size = tf.shape(features)[0]
+    batch_size, height, width = shape_utils.combined_static_and_dynamic_shape(
+        features)[0:3]
+    class_predictions_with_background = tf.reshape(
+        class_predictions_with_background, [
+            batch_size, height, width, self._num_predictions_per_location,
+            self._num_class_slots
+        ])
    class_predictions_with_background = self._score_converter_fn(
        class_predictions_with_background)
    if self._return_flat_predictions:
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background,
          [batch_size, -1, self._num_class_slots])
+    else:
+      class_predictions_with_background = tf.reshape(
+          class_predictions_with_background, [
+              batch_size, height, width,
+              self._num_predictions_per_location * self._num_class_slots
+          ])
    return class_predictions_with_background
--- a/research/object_detection/predictors/heads/keras_class_head_tf2_test.py
+++ b/research/object_detection/predictors/heads/keras_class_head_tf2_test.py
@@ -15,6 +15,7 @@

 """Tests for object_detection.predictors.heads.class_head."""
 import unittest
+import numpy as np
 import tensorflow.compat.v1 as tf

 from google.protobuf import text_format
@@ -198,6 +199,38 @@ class WeightSharedConvolutionalKerasClassPredictorTest(test_case.TestCase):
    class_prediction_head(image_feature)
    self.assertEqual(len(class_prediction_head.variables), 2)

+  def test_softmax_score_converter(self):
+    num_class_slots = 10
+    batch_size = 2
+    height = 17
+    width = 19
+    num_predictions_per_location = 2
+    assert num_predictions_per_location != 1
+
+    conv_hyperparams = self._build_conv_hyperparams()
+    class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead(
+        num_class_slots=num_class_slots,
+        conv_hyperparams=conv_hyperparams,
+        num_predictions_per_location=num_predictions_per_location,
+        score_converter_fn=tf.nn.softmax)
+
+    def graph_fn():
+      image_feature = tf.random_uniform([batch_size, height, width, 1024],
+                                        minval=-10.0,
+                                        maxval=10.0,
+                                        dtype=tf.float32)
+      class_predictions = class_prediction_head(image_feature)
+      return class_predictions
+
+    class_predictions_out = self.execute(graph_fn, [])
+    class_predictions_sum = np.sum(class_predictions_out, axis=-1)
+    num_anchors = height * width * num_predictions_per_location
+    exp_class_predictions_sum = np.ones((batch_size, num_anchors),
+                                        dtype=np.float32)
+    self.assertAllEqual((batch_size, num_anchors, num_class_slots),
+                        class_predictions_out.shape)
+    self.assertAllClose(class_predictions_sum, exp_class_predictions_sum)
+

 if __name__ == '__main__':
  tf.test.main()