Unify computation of weak losses and fix color consistency computation.

Also upgrades keras version due to https://github.com/tensorflow/tensorflow/issues/51592 PiperOrigin-RevId: 407556344

Unify computation of weak losses and fix color consistency computation.
Also upgrades keras version due to https://github.com/tensorflow/tensorflow/issues/51592 PiperOrigin-RevId: 407556344
88d844e7 · Vighnesh Birodkar · TF Object Detection Team · 13ec3c14 · 88d844e7 · 88d844e7
Commit 88d844e7 authored Nov 04, 2021 by Vighnesh Birodkar Committed by TF Object Detection Team Nov 04, 2021
3 changed files
--- a/research/object_detection/meta_architectures/deepmac_meta_arch.py
+++ b/research/object_detection/meta_architectures/deepmac_meta_arch.py
-"""Deep Mask heads above CenterNet (DeepMAC) architecture.
+"""Deep Mask heads above CenterNet (DeepMAC)[1] architecture.

-TODO(vighneshb) Add link to paper when done.
+[1]: https://arxiv.org/abs/2104.00613
 """

 import collections

+from absl import logging
 import numpy as np
 import tensorflow as tf

@@ -36,6 +37,7 @@ LOSS_KEY_PREFIX = center_net_meta_arch.LOSS_KEY_PREFIX
 NEIGHBORS_2D = [[-1, -1], [-1, 0], [-1, 1],
                [0, -1], [0, 1],
                [1, -1], [1, 0], [1, 1]]
+WEAK_LOSSES = [DEEP_MASK_BOX_CONSISTENCY, DEEP_MASK_COLOR_CONSISTENCY]


 class DeepMACParams(
@@ -74,6 +76,15 @@ class DeepMACParams(
                              color_consistency_loss_weight)


+def _get_weak_loss_weight(loss_name, config):
+  if loss_name == DEEP_MASK_COLOR_CONSISTENCY:
+    return config.color_consistency_loss_weight
+  elif loss_name == DEEP_MASK_BOX_CONSISTENCY:
+    return config.box_consistency_loss_weight
+  else:
+    raise ValueError('Unknown loss - {}'.format(loss_name))
+
+
 def subsample_instances(classes, weights, boxes, masks, num_subsamples):
  """Randomly subsamples instances to the desired number.

@@ -952,6 +963,11 @@ class DeepMACMetaArch(center_net_meta_arch.CenterNetMetaArch):
      loss: A [num_instances] shaped tensor with the loss for each instance.
    """

+    if not self._deepmac_params.predict_full_resolution_masks:
+      logging.info('Color consistency is not implemented with RoIAlign '
+                   ', i.e, fixed sized masks. Returning 0 loss.')
+      return tf.zeros(tf.shape(boxes)[0])
+
    dilation = self._deepmac_params.color_consistency_dilation

    height, width = (tf.shape(preprocessed_image)[0],
@@ -1032,7 +1048,11 @@ class DeepMACMetaArch(center_net_meta_arch.CenterNetMetaArch):
    color_consistency_loss = self._compute_per_instance_color_consistency_loss(
        boxes, image, mask_logits)

-    return mask_prediction_loss, box_consistency_loss, color_consistency_loss
+    return {
+        DEEP_MASK_ESTIMATION: mask_prediction_loss,
+        DEEP_MASK_BOX_CONSISTENCY: box_consistency_loss,
+        DEEP_MASK_COLOR_CONSISTENCY: color_consistency_loss
+    }

  def _get_lab_image(self, preprocessed_image):
    raw_image = self._feature_extractor.preprocess_reverse(
@@ -1066,10 +1086,11 @@ class DeepMACMetaArch(center_net_meta_arch.CenterNetMetaArch):

    loss_dict = {
        DEEP_MASK_ESTIMATION: 0.0,
-        DEEP_MASK_BOX_CONSISTENCY: 0.0,
-        DEEP_MASK_COLOR_CONSISTENCY: 0.0
    }

+    for loss_name in WEAK_LOSSES:
+      loss_dict[loss_name] = 0.0
+
    prediction_shape = tf.shape(prediction_dict[INSTANCE_EMBEDDING][0])
    height, width = prediction_shape[1], prediction_shape[2]

@@ -1093,26 +1114,24 @@ class DeepMACMetaArch(center_net_meta_arch.CenterNetMetaArch):
        classes, valid_mask_weights, masks = filter_masked_classes(
            allowed_masked_classes_ids, classes, weights, masks)

-        (per_instance_mask_loss, per_instance_consistency_loss,
-         per_instance_color_consistency_loss) = (
-             self._compute_per_instance_deepmac_losses(
-                 boxes, masks, instance_pred[i], pixel_pred[i],
-                 image[i]))
-        per_instance_mask_loss *= valid_mask_weights
-        per_instance_consistency_loss *= weights
+        sample_loss_dict = self._compute_per_instance_deepmac_losses(
+            boxes, masks, instance_pred[i], pixel_pred[i], image[i])
+
+        sample_loss_dict[DEEP_MASK_ESTIMATION] *= valid_mask_weights
+        for loss_name in WEAK_LOSSES:
+          sample_loss_dict[loss_name] *= weights

        num_instances = tf.maximum(tf.reduce_sum(weights), 1.0)
        num_instances_allowed = tf.maximum(
            tf.reduce_sum(valid_mask_weights), 1.0)

        loss_dict[DEEP_MASK_ESTIMATION] += (
-            tf.reduce_sum(per_instance_mask_loss) / num_instances_allowed)
-
-        loss_dict[DEEP_MASK_BOX_CONSISTENCY] += (
-            tf.reduce_sum(per_instance_consistency_loss) / num_instances)
+            tf.reduce_sum(sample_loss_dict[DEEP_MASK_ESTIMATION]) /
+            num_instances_allowed)

-        loss_dict[DEEP_MASK_COLOR_CONSISTENCY] += (
-            tf.reduce_sum(per_instance_color_consistency_loss) / num_instances)
+        for loss_name in WEAK_LOSSES:
+          loss_dict[loss_name] += (tf.reduce_sum(sample_loss_dict[loss_name]) /
+                                   num_instances)

    batch_size = len(gt_boxes_list)
    num_predictions = len(prediction_dict[INSTANCE_EMBEDDING])
@@ -1134,17 +1153,12 @@ class DeepMACMetaArch(center_net_meta_arch.CenterNetMetaArch):
              DEEP_MASK_ESTIMATION]
      )

-      if self._deepmac_params.box_consistency_loss_weight > 0.0:
-        losses_dict[LOSS_KEY_PREFIX + '/' + DEEP_MASK_BOX_CONSISTENCY] = (
-            self._deepmac_params.box_consistency_loss_weight * mask_loss_dict[
-                DEEP_MASK_BOX_CONSISTENCY]
-        )
+      for loss_name in WEAK_LOSSES:
+        loss_weight = _get_weak_loss_weight(loss_name, self._deepmac_params)
+        if loss_weight > 0.0:
+          losses_dict[LOSS_KEY_PREFIX + '/' + loss_name] = (
+              loss_weight * mask_loss_dict[loss_name])

-      if self._deepmac_params.color_consistency_loss_weight > 0.0:
-        losses_dict[LOSS_KEY_PREFIX + '/' + DEEP_MASK_COLOR_CONSISTENCY] = (
-            self._deepmac_params.box_consistency_loss_weight * mask_loss_dict[
-                DEEP_MASK_COLOR_CONSISTENCY]
-        )
    return losses_dict

  def postprocess(self, prediction_dict, true_image_shapes, **params):

--- a/research/object_detection/meta_architectures/deepmac_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/deepmac_meta_arch_test.py
@@ -432,11 +432,12 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
    masks[1, 16:, 16:] = 1.0
    masks = tf.constant(masks)

-    loss, _, _ = model._compute_per_instance_deepmac_losses(
+    loss_dict = model._compute_per_instance_deepmac_losses(
        boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)),
        tf.zeros((16, 16, 3)))
    self.assertAllClose(
-        loss, np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9)))
+        loss_dict[deepmac_meta_arch.DEEP_MASK_ESTIMATION],
+        np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9)))

  def test_per_instance_loss_no_crop_resize(self):

@@ -446,11 +447,12 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
    masks = np.ones((2, 128, 128), dtype=np.float32)
    masks = tf.constant(masks)

-    loss, _, _ = model._compute_per_instance_deepmac_losses(
+    loss_dict = model._compute_per_instance_deepmac_losses(
        boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)),
        tf.zeros((32, 32, 3)))
    self.assertAllClose(
-        loss, np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9)))
+        loss_dict[deepmac_meta_arch.DEEP_MASK_ESTIMATION],
+        np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9)))

  def test_per_instance_loss_no_crop_resize_dice(self):

@@ -461,21 +463,23 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
    masks = np.ones((2, 128, 128), dtype=np.float32)
    masks = tf.constant(masks)

-    loss, _, _ = model._compute_per_instance_deepmac_losses(
+    loss_dict = model._compute_per_instance_deepmac_losses(
        boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)),
        tf.zeros((32, 32, 3)))
    pred = tf.nn.sigmoid(0.9)
    expected = (1.0 - ((2.0 * pred) / (1.0 + pred)))
-    self.assertAllClose(loss, [expected, expected], rtol=1e-3)
+    self.assertAllClose(loss_dict[deepmac_meta_arch.DEEP_MASK_ESTIMATION],
+                        [expected, expected], rtol=1e-3)

  def test_empty_masks(self):
    boxes = tf.zeros([0, 4])
    masks = tf.zeros([0, 128, 128])

-    loss, _, _ = self.model._compute_per_instance_deepmac_losses(
+    loss_dict = self.model._compute_per_instance_deepmac_losses(
        boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)),
        tf.zeros((16, 16, 3)))
-    self.assertEqual(loss.shape, (0,))
+    self.assertEqual(loss_dict[deepmac_meta_arch.DEEP_MASK_ESTIMATION].shape,
+                     (0,))

  def test_postprocess(self):

@@ -679,6 +683,53 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
    output = self.model._get_lab_image(tf.zeros((2, 4, 4, 3)))
    self.assertEqual(output.shape, (2, 4, 4, 3))

+  def test_loss_keys(self):
+    model = build_meta_arch(use_dice_loss=True)
+    prediction = {
+        'preprocessed_inputs': tf.random.normal((1, 32, 32, 3)),
+        'INSTANCE_EMBEDDING': [tf.random.normal((1, 8, 8, 17))] * 2,
+        'PIXEL_EMBEDDING': [tf.random.normal((1, 8, 8, 19))] * 2,
+        'object_center': [tf.random.normal((1, 8, 8, 6))] * 2,
+        'box/offset': [tf.random.normal((1, 8, 8, 2))] * 2,
+        'box/scale': [tf.random.normal((1, 8, 8, 2))] * 2
+    }
+    model.provide_groundtruth(
+        groundtruth_boxes_list=[tf.convert_to_tensor([[0., 0., 1., 1.]] * 5)],
+        groundtruth_classes_list=[tf.one_hot([1, 0, 1, 1, 1], depth=6)],
+        groundtruth_weights_list=[tf.ones(5)],
+        groundtruth_masks_list=[tf.ones((5, 32, 32))])
+    loss = model.loss(prediction, tf.constant([[32, 32, 3.0]]))
+    self.assertGreater(loss['Loss/deep_mask_estimation'], 0.0)
+
+    for weak_loss in deepmac_meta_arch.WEAK_LOSSES:
+      if weak_loss == deepmac_meta_arch.DEEP_MASK_COLOR_CONSISTENCY:
+        continue
+      self.assertGreater(loss['Loss/' + weak_loss], 0.0,
+                         '{} was <= 0'.format(weak_loss))
+
+  def test_loss_keys_full_res(self):
+    model = build_meta_arch(use_dice_loss=True,
+                            predict_full_resolution_masks=True)
+    prediction = {
+        'preprocessed_inputs': tf.random.normal((1, 32, 32, 3)),
+        'INSTANCE_EMBEDDING': [tf.random.normal((1, 8, 8, 17))] * 2,
+        'PIXEL_EMBEDDING': [tf.random.normal((1, 8, 8, 19))] * 2,
+        'object_center': [tf.random.normal((1, 8, 8, 6))] * 2,
+        'box/offset': [tf.random.normal((1, 8, 8, 2))] * 2,
+        'box/scale': [tf.random.normal((1, 8, 8, 2))] * 2
+    }
+    model.provide_groundtruth(
+        groundtruth_boxes_list=[tf.convert_to_tensor([[0., 0., 1., 1.]] * 5)],
+        groundtruth_classes_list=[tf.one_hot([1, 0, 1, 1, 1], depth=6)],
+        groundtruth_weights_list=[tf.ones(5)],
+        groundtruth_masks_list=[tf.ones((5, 32, 32))])
+    loss = model.loss(prediction, tf.constant([[32, 32, 3.0]]))
+    self.assertGreater(loss['Loss/deep_mask_estimation'], 0.0)
+
+    for weak_loss in deepmac_meta_arch.WEAK_LOSSES:
+      self.assertGreater(loss['Loss/' + weak_loss], 0.0,
+                         '{} was <= 0'.format(weak_loss))
+

 @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
 class FullyConnectedMaskHeadTest(tf.test.TestCase):

--- a/research/object_detection/packages/tf2/setup.py
+++ b/research/object_detection/packages/tf2/setup.py
@@ -22,7 +22,10 @@ REQUIRED_PACKAGES = [
    'scipy',
    'pandas',
    'tf-models-official>=2.5.1',
-    'tensorflow_io'
+    'tensorflow_io',
+    # Workaround due to
+    # https://github.com/keras-team/keras/issues/15583
+    'keras==2.6.0'
 ]

 setup(