Enable deterministic center cropping/padding in object detection preprocessing.

PiperOrigin-RevId: 371929331

Enable deterministic center cropping/padding in object detection preprocessing.
PiperOrigin-RevId: 371929331
64f6527c · A. Unique TensorFlower · TF Object Detection Team · 8f58f396 · 64f6527c · 64f6527c
Commit 64f6527c authored May 04, 2021 by A. Unique TensorFlower Committed by TF Object Detection Team May 04, 2021
Showing with 114 additions and 4 deletions

research/object_detection/core/preprocessor.py research/object_detection/core/preprocessor.py +21 -4

research/object_detection/core/preprocessor_test.py research/object_detection/core/preprocessor_test.py +93 -0

No files found.
--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -1776,6 +1776,7 @@ def random_pad_image(image,
                     min_image_size=None,
                     max_image_size=None,
                     pad_color=None,
+                     center_pad=False,
                     seed=None,
                     preprocess_vars_cache=None):
  """Randomly pads the image.
@@ -1814,6 +1815,8 @@ def random_pad_image(image,
    pad_color: padding color. A rank 1 tensor of [channels] with dtype=
               tf.float32. if set as None, it will be set to average color of
               the input image.
+    center_pad: whether the original image will be padded to the center, or
+                randomly padded (which is default).
    seed: random seed.
    preprocess_vars_cache: PreprocessorCache object that records previously
                           performed augmentations. Updated in-place. If this
@@ -1870,6 +1873,12 @@ def random_pad_image(image,
      lambda: _random_integer(0, target_width - image_width, seed),
      lambda: tf.constant(0, dtype=tf.int32))

+  if center_pad:
+    offset_height = tf.cast(tf.floor((target_height - image_height) / 2),
+                            tf.int32)
+    offset_width = tf.cast(tf.floor((target_width - image_width) / 2),
+                           tf.int32)
+
  gen_func = lambda: (target_height, target_width, offset_height, offset_width)
  params = _get_or_create_preprocess_rand_vars(
      gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE,
@@ -2113,7 +2122,7 @@ def random_crop_pad_image(image,
      max_padded_size_ratio,
      dtype=tf.int32)

-  padded_image, padded_boxes = random_pad_image(
+  padded_image, padded_boxes = random_pad_image(  # pylint: disable=unbalanced-tuple-unpacking
      cropped_image,
      cropped_boxes,
      min_image_size=min_image_size,
@@ -2153,6 +2162,7 @@ def random_crop_to_aspect_ratio(image,
                                aspect_ratio=1.0,
                                overlap_thresh=0.3,
                                clip_boxes=True,
+                                center_crop=False,
                                seed=None,
                                preprocess_vars_cache=None):
  """Randomly crops an image to the specified aspect ratio.
@@ -2191,6 +2201,7 @@ def random_crop_to_aspect_ratio(image,
    overlap_thresh: minimum overlap thresh with new cropped
                    image to keep the box.
    clip_boxes: whether to clip the boxes to the cropped image.
+    center_crop: whether to take the center crop or a random crop.
    seed: random seed.
    preprocess_vars_cache: PreprocessorCache object that records previously
                           performed augmentations. Updated in-place. If this
@@ -2247,8 +2258,14 @@ def random_crop_to_aspect_ratio(image,
    # either offset_height = 0 and offset_width is randomly chosen from
    # [0, offset_width - target_width), or else offset_width = 0 and
    # offset_height is randomly chosen from [0, offset_height - target_height)
-    offset_height = _random_integer(0, orig_height - target_height + 1, seed)
-    offset_width = _random_integer(0, orig_width - target_width + 1, seed)
+    if center_crop:
+      offset_height = tf.cast(tf.math.floor((orig_height - target_height) / 2),
+                              tf.int32)
+      offset_width = tf.cast(tf.math.floor((orig_width - target_width) / 2),
+                             tf.int32)
+    else:
+      offset_height = _random_integer(0, orig_height - target_height + 1, seed)
+      offset_width = _random_integer(0, orig_width - target_width + 1, seed)

    generator_func = lambda: (offset_height, offset_width)
    offset_height, offset_width = _get_or_create_preprocess_rand_vars(
@@ -2979,7 +2996,7 @@ def resize_to_range(image,
                         'per-channel pad value.')
      new_image = tf.stack(
          [
-              tf.pad(
+              tf.pad(  # pylint: disable=g-complex-comprehension
                  channels[i], [[0, max_dimension - new_size[0]],
                                [0, max_dimension - new_size[1]]],
                  constant_values=per_channel_pad_value[i])

--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -2194,6 +2194,54 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
                        expected_boxes.flatten())
    self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])

+  def testRunRandomCropToAspectRatioCenterCrop(self):
+    def graph_fn():
+      image = self.createColorfulTestImage()
+      boxes = self.createTestBoxes()
+      labels = self.createTestLabels()
+      weights = self.createTestGroundtruthWeights()
+      masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+
+      tensor_dict = {
+          fields.InputDataFields.image: image,
+          fields.InputDataFields.groundtruth_boxes: boxes,
+          fields.InputDataFields.groundtruth_classes: labels,
+          fields.InputDataFields.groundtruth_weights: weights,
+          fields.InputDataFields.groundtruth_instance_masks: masks
+      }
+
+      preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+          include_instance_masks=True)
+
+      preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
+          'center_crop': True
+      })]
+
+      with mock.patch.object(preprocessor,
+                             '_random_integer') as mock_random_integer:
+        mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
+        distorted_tensor_dict = preprocessor.preprocess(
+            tensor_dict,
+            preprocessing_options,
+            func_arg_map=preprocessor_arg_map)
+        distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+        distorted_boxes = distorted_tensor_dict[
+            fields.InputDataFields.groundtruth_boxes]
+        distorted_labels = distorted_tensor_dict[
+            fields.InputDataFields.groundtruth_classes]
+        return [
+            distorted_image, distorted_boxes, distorted_labels
+        ]
+
+    (distorted_image_, distorted_boxes_, distorted_labels_) = self.execute_cpu(
+        graph_fn, [])
+    expected_boxes = np.array([[0.0, 0.0, 0.75, 1.0],
+                               [0.25, 0.5, 0.75, 1.0]], dtype=np.float32)
+    self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
+    self.assertAllEqual(distorted_labels_, [1, 2])
+    self.assertAllClose(distorted_boxes_.flatten(),
+                        expected_boxes.flatten())
+
  def testRunRandomCropToAspectRatioWithKeypoints(self):
    def graph_fn():
      image = self.createColorfulTestImage()
@@ -2433,6 +2481,51 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
    self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
        padded_boxes_[:, 3] - padded_boxes_[:, 1])))

+  def testRandomPadImageCenterPad(self):
+    def graph_fn():
+      preprocessing_options = [(preprocessor.normalize_image, {
+          'original_minval': 0,
+          'original_maxval': 255,
+          'target_minval': 0,
+          'target_maxval': 1
+      })]
+
+      images = self.createColorfulTestImage()
+      boxes = self.createTestBoxes()
+      labels = self.createTestLabels()
+      tensor_dict = {
+          fields.InputDataFields.image: images,
+          fields.InputDataFields.groundtruth_boxes: boxes,
+          fields.InputDataFields.groundtruth_classes: labels,
+      }
+      tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+      images = tensor_dict[fields.InputDataFields.image]
+
+      preprocessing_options = [(preprocessor.random_pad_image, {
+          'center_pad': True,
+          'min_image_size': [400, 400],
+          'max_image_size': [400, 400],
+      })]
+      padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                   preprocessing_options)
+
+      padded_images = padded_tensor_dict[fields.InputDataFields.image]
+      padded_boxes = padded_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      padded_labels = padded_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      return [padded_images, padded_boxes, padded_labels]
+    (padded_images_, padded_boxes_, padded_labels_) = self.execute_cpu(
+        graph_fn, [])
+
+    expected_boxes = np.array([[0.25, 0.25, 0.625, 1.0],
+                               [0.375, 0.5, .625, 1.0]], dtype=np.float32)
+
+    self.assertAllEqual(padded_images_.shape, [1, 400, 400, 3])
+    self.assertAllEqual(padded_labels_, [1, 2])
+    self.assertAllClose(padded_boxes_.flatten(),
+                        expected_boxes.flatten())
+
  @parameterized.parameters(
      {'include_dense_pose': False},
  )