Make resize_to_range preserve static spatial shape when available.

4e26fbfa · Derek Chow · ec91f6b8 · 4e26fbfa · 4e26fbfa
Commit 4e26fbfa authored Jul 17, 2017 by Derek Chow
Showing with 117 additions and 97 deletions

object_detection/core/preprocessor.py object_detection/core/preprocessor.py +86 -52

object_detection/core/preprocessor_test.py object_detection/core/preprocessor_test.py +31 -45

No files found.
--- a/object_detection/core/preprocessor.py
+++ b/object_detection/core/preprocessor.py
@@ -1255,6 +1255,82 @@ def random_resize_method(image, target_size):
  return resized_image
+def _compute_new_static_size(image,
+                             min_dimension,
+                             max_dimension):
+  """Compute new static shape for resize_to_range method."""
+  image_shape = image.get_shape().as_list()
+  orig_height = image_shape[0]
+  orig_width = image_shape[1]
+  orig_min_dim = min(orig_height, orig_width)
+  # Calculates the larger of the possible sizes
+  large_scale_factor = min_dimension / float(orig_min_dim)
+  # Scaling orig_(height|width) by large_scale_factor will make the smaller
+  # dimension equal to min_dimension, save for floating point rounding errors.
+  # For reasonably-sized images, taking the nearest integer will reliably
+  # eliminate this error.
+  large_height = int(round(orig_height * large_scale_factor))
+  large_width = int(round(orig_width * large_scale_factor))
+  large_size = [large_height, large_width]
+  if max_dimension:
+    # Calculates the smaller of the possible sizes, use that if the larger
+    # is too big.
+    orig_max_dim = max(orig_height, orig_width)
+    small_scale_factor = max_dimension / float(orig_max_dim)
+    # Scaling orig_(height|width) by small_scale_factor will make the larger
+    # dimension equal to max_dimension, save for floating point rounding
+    # errors. For reasonably-sized images, taking the nearest integer will
+    # reliably eliminate this error.
+    small_height = int(round(orig_height * small_scale_factor))
+    small_width = int(round(orig_width * small_scale_factor))
+    small_size = [small_height, small_width]
+    new_size = large_size
+    if max(large_size) > max_dimension:
+      new_size = small_size
+  else:
+    new_size = large_size
+  return tf.constant(new_size)
+def _compute_new_dynamic_size(image,
+                              min_dimension,
+                              max_dimension):
+  """Compute new dynamic shape for resize_to_range method."""
+  image_shape = tf.shape(image)
+  orig_height = tf.to_float(image_shape[0])
+  orig_width = tf.to_float(image_shape[1])
+  orig_min_dim = tf.minimum(orig_height, orig_width)
+  # Calculates the larger of the possible sizes
+  min_dimension = tf.constant(min_dimension, dtype=tf.float32)
+  large_scale_factor = min_dimension / orig_min_dim
+  # Scaling orig_(height|width) by large_scale_factor will make the smaller
+  # dimension equal to min_dimension, save for floating point rounding errors.
+  # For reasonably-sized images, taking the nearest integer will reliably
+  # eliminate this error.
+  large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
+  large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
+  large_size = tf.stack([large_height, large_width])
+  if max_dimension:
+    # Calculates the smaller of the possible sizes, use that if the larger
+    # is too big.
+    orig_max_dim = tf.maximum(orig_height, orig_width)
+    max_dimension = tf.constant(max_dimension, dtype=tf.float32)
+    small_scale_factor = max_dimension / orig_max_dim
+    # Scaling orig_(height|width) by small_scale_factor will make the larger
+    # dimension equal to max_dimension, save for floating point rounding
+    # errors. For reasonably-sized images, taking the nearest integer will
+    # reliably eliminate this error.
+    small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
+    small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
+    small_size = tf.stack([small_height, small_width])
+    new_size = tf.cond(
+        tf.to_float(tf.reduce_max(large_size)) > max_dimension,
+        lambda: small_size, lambda: large_size)
+  else:
+    new_size = large_size
+  return new_size
 def resize_to_range(image,
                    masks=None,
                    min_dimension=None,
@@ -1295,64 +1371,22 @@ def resize_to_range(image,
    raise ValueError('Image should be 3D tensor')
  with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
-    image_shape = tf.shape(image)
+    if image.get_shape().is_fully_defined():
-    orig_height = tf.to_float(image_shape[0])
+      new_size = _compute_new_static_size(image, min_dimension,
-    orig_width = tf.to_float(image_shape[1])
+                                          max_dimension)
-    orig_min_dim = tf.minimum(orig_height, orig_width)
-    # Calculates the larger of the possible sizes
-    min_dimension = tf.constant(min_dimension, dtype=tf.float32)
-    large_scale_factor = min_dimension / orig_min_dim
-    # Scaling orig_(height|width) by large_scale_factor will make the smaller
-    # dimension equal to min_dimension, save for floating point rounding errors.
-    # For reasonably-sized images, taking the nearest integer will reliably
-    # eliminate this error.
-    large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
-    large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
-    large_size = tf.stack([large_height, large_width])
-    if max_dimension:
-      # Calculates the smaller of the possible sizes, use that if the larger
-      # is too big.
-      orig_max_dim = tf.maximum(orig_height, orig_width)
-      max_dimension = tf.constant(max_dimension, dtype=tf.float32)
-      small_scale_factor = max_dimension / orig_max_dim
-      # Scaling orig_(height|width) by small_scale_factor will make the larger
-      # dimension equal to max_dimension, save for floating point rounding
-      # errors. For reasonably-sized images, taking the nearest integer will
-      # reliably eliminate this error.
-      small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
-      small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
-      small_size = tf.stack([small_height, small_width])
-      new_size = tf.cond(
-          tf.to_float(tf.reduce_max(large_size)) > max_dimension,
-          lambda: small_size, lambda: large_size)
    else:
-      new_size = large_size
+      new_size = _compute_new_dynamic_size(image, min_dimension,
+                                           max_dimension)
    new_image = tf.image.resize_images(image, new_size,
                                       align_corners=align_corners)
    result = new_image
    if masks is not None:
-      num_instances = tf.shape(masks)[0]
+      new_masks = tf.expand_dims(masks, 3)
+      new_masks = tf.image.resize_nearest_neighbor(new_masks, new_size,
-      def resize_masks_branch():
+                                                   align_corners=align_corners)
-        new_masks = tf.expand_dims(masks, 3)
+      new_masks = tf.squeeze(new_masks, 3)
-        new_masks = tf.image.resize_nearest_neighbor(
+      result = [new_image, new_masks]
-            new_masks, new_size, align_corners=align_corners)
-        new_masks = tf.squeeze(new_masks, axis=3)
-        return new_masks
-      def reshape_masks_branch():
-        new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
-        return new_masks
-      masks = tf.cond(num_instances > 0,
-                      resize_masks_branch,
-                      reshape_masks_branch)
-      result = [new_image, masks]
    return result

--- a/object_detection/core/preprocessor_test.py
+++ b/object_detection/core/preprocessor_test.py
@@ -1395,7 +1395,7 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllEqual(expected_images_shape_,
                          resized_images_shape_)
-  def testResizeToRange(self):
+  def testResizeToRangePreservesStaticSpatialShape(self):
    """Tests image resizing, checking output sizes."""
    in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
    min_dim = 50
@@ -1406,13 +1406,27 @@ class PreprocessorTest(tf.test.TestCase):
      in_image = tf.random_uniform(in_shape)
      out_image = preprocessor.resize_to_range(
          in_image, min_dimension=min_dim, max_dimension=max_dim)
-      out_image_shape = tf.shape(out_image)
+      self.assertAllEqual(out_image.get_shape().as_list(), expected_shape)
+  def testResizeToRangeWithDynamicSpatialShape(self):
+    """Tests image resizing, checking output sizes."""
+    in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
+    min_dim = 50
+    max_dim = 100
+    expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
+    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+      in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+      out_image = preprocessor.resize_to_range(
+          in_image, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
      with self.test_session() as sess:
-        out_image_shape = sess.run(out_image_shape)
+        out_image_shape = sess.run(out_image_shape,
+                                   feed_dict={in_image:
+                                              np.random.randn(*in_shape)})
        self.assertAllEqual(out_image_shape, expected_shape)
-  def testResizeToRangeWithMasks(self):
+  def testResizeToRangeWithMasksPreservesStaticSpatialShape(self):
    """Tests image resizing, checking output sizes."""
    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
    in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
@@ -1430,30 +1444,25 @@ class PreprocessorTest(tf.test.TestCase):
      in_masks = tf.random_uniform(in_masks_shape)
      out_image, out_masks = preprocessor.resize_to_range(
          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
-      out_image_shape = tf.shape(out_image)
+      self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape)
-      out_masks_shape = tf.shape(out_masks)
+      self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape)
-      with self.test_session() as sess:
-        out_image_shape, out_masks_shape = sess.run(
-            [out_image_shape, out_masks_shape])
-        self.assertAllEqual(out_image_shape, expected_image_shape)
-        self.assertAllEqual(out_masks_shape, expected_mask_shape)
-  def testResizeToRangeWithNoInstanceMask(self):
+  def testResizeToRangeWithMasksAndDynamicSpatialShape(self):
    """Tests image resizing, checking output sizes."""
    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
-    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+    in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
    min_dim = 50
    max_dim = 100
    expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
-    expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
+    expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
    for (in_image_shape, expected_image_shape, in_masks_shape,
         expected_mask_shape) in zip(in_image_shape_list,
                                     expected_image_shape_list,
                                     in_masks_shape_list,
                                     expected_masks_shape_list):
-      in_image = tf.random_uniform(in_image_shape)
+      in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+      in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
      in_masks = tf.random_uniform(in_masks_shape)
      out_image, out_masks = preprocessor.resize_to_range(
          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
@@ -1462,38 +1471,15 @@ class PreprocessorTest(tf.test.TestCase):
      with self.test_session() as sess:
        out_image_shape, out_masks_shape = sess.run(
-            [out_image_shape, out_masks_shape])
+            [out_image_shape, out_masks_shape],
-        self.assertAllEqual(out_image_shape, expected_image_shape)
+            feed_dict={
-        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+                in_image: np.random.randn(*in_image_shape),
+                in_masks: np.random.randn(*in_masks_shape)
-  def testResizeImageWithMasks(self):
+            })
-    """Tests image resizing, checking output sizes."""
-    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
-    in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
-    height = 50
-    width = 100
-    expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
-    expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
-    for (in_image_shape, expected_image_shape, in_masks_shape,
-         expected_mask_shape) in zip(in_image_shape_list,
-                                     expected_image_shape_list,
-                                     in_masks_shape_list,
-                                     expected_masks_shape_list):
-      in_image = tf.random_uniform(in_image_shape)
-      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_image(
-          in_image, in_masks, new_height=height, new_width=width)
-      out_image_shape = tf.shape(out_image)
-      out_masks_shape = tf.shape(out_masks)
-      with self.test_session() as sess:
-        out_image_shape, out_masks_shape = sess.run(
-            [out_image_shape, out_masks_shape])
        self.assertAllEqual(out_image_shape, expected_image_shape)
        self.assertAllEqual(out_masks_shape, expected_mask_shape)
-  def testResizeImageWithNoInstanceMask(self):
+  def testResizeToRangeWithInstanceMasksTensorOfSizeZero(self):
    """Tests image resizing, checking output sizes."""
    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]