Internal changes for slim (#3448)

* Merged commit includes the following changes: 186565198 by Sergio Guadarrama: Applied random_hsv_in_yiq in inception_preprocessing. -- 186501039 by Sergio Guadarrama: Applied random_hsv_in_yiq in inception_preprocessing. -- 186013907 by Sergio Guadarrama: Internal change 185715309 by Sergio Guadarrama: Obviates the need for prepadding on mobilenet v1 and v2 for fully convolutional models. -- 184266252 by Sergio Guadarrama: Give build_nasnet_*() functions an optional flag use_aux_head, and add an internal-only arg scope to NasNetA*Cell._apply_drop_path(). -- 183865228 by Sergio Guadarrama: Internal change 179580924 by Sergio Guadarrama: Internal change 177320302 by Sergio Guadarrama: Internal change 177130184 by Sergio Guadarrama: Make slim nets tests faster by using smaller examples of oversized inputs. -- 176965289 by Sergio Guadarrama: Internal change 176585260 by Sergio Guadarrama: Internal change 176534973 by Sergio Guadarrama: Internal change 175526881 by Sergio Guadarrama: Internal change 174967704 by Sergio Guadarrama: Treat num_classes=0 same as None in a few slim nets overlooked by the recent change. -- 174443227 by Sergio Guadarrama: Internal change 174281864 by Sergio Guadarrama: Internal change 174249903 by Sergio Guadarrama: Fix nasnet image classification and object detection by moving the option to turn ON or OFF batch norm training into it's own arg_scope used only by detection -- 173954505 by Sergio Guadarrama: Merge pull request #2651 from sguada/tmp1 Fixes imports Closes #2636 ORIGINAL_AUTHOR=Jon Shlens <shlens@users.noreply.github.com> COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/models/pull/2636 from tensorflow:sguada-patch-1 19ff570f52df5ab655c00fb439129b201c5f2dce -- 173928094 by Sergio Guadarrama: Remove pending imports -- PiperOrigin-RevId: 186565198 * Remove internal links.

Internal changes for slim (#3448)
* Merged commit includes the following changes: 186565198 by Sergio Guadarrama: Applied random_hsv_in_yiq in inception_preprocessing. -- 186501039 by Sergio Guadarrama: Applied random_hsv_in_yiq in inception_preprocessing. -- 186013907 by Sergio Guadarrama: Internal change 185715309 by Sergio Guadarrama: Obviates the need for prepadding on mobilenet v1 and v2 for fully convolutional models. -- 184266252 by Sergio Guadarrama: Give build_nasnet_*() functions an optional flag use_aux_head, and add an internal-only arg scope to NasNetA*Cell._apply_drop_path(). -- 183865228 by Sergio Guadarrama: Internal change 179580924 by Sergio Guadarrama: Internal change 177320302 by Sergio Guadarrama: Internal change 177130184 by Sergio Guadarrama: Make slim nets tests faster by using smaller examples of oversized inputs. -- 176965289 by Sergio Guadarrama: Internal change 176585260 by Sergio Guadarrama: Internal change 176534973 by Sergio Guadarrama: Internal change 175526881 by Sergio Guadarrama: Internal change 174967704 by Sergio Guadarrama: Treat num_classes=0 same as None in a few slim nets overlooked by the recent change. -- 174443227 by Sergio Guadarrama: Internal change 174281864 by Sergio Guadarrama: Internal change 174249903 by Sergio Guadarrama: Fix nasnet image classification and object detection by moving the option to turn ON or OFF batch norm training into it's own arg_scope used only by detection -- 173954505 by Sergio Guadarrama: Merge pull request #2651 from sguada/tmp1 Fixes imports Closes #2636 ORIGINAL_AUTHOR=Jon Shlens <shlens@users.noreply.github.com> COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/models/pull/2636 from tensorflow:sguada-patch-1 19ff570f52df5ab655c00fb439129b201c5f2dce -- 173928094 by Sergio Guadarrama: Remove pending imports -- PiperOrigin-RevId: 186565198 * Remove internal links.
629adffa · pkulzc · Sergio Guadarrama · 599521ef · 629adffa · 629adffa
Commit 629adffa authored Feb 27, 2018 by pkulzc Committed by Sergio Guadarrama Feb 27, 2018
8 changed files
--- a/research/slim/nets/resnet_utils.py
+++ b/research/slim/nets/resnet_utils.py
@@ -124,6 +124,7 @@ def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):

 @slim.add_arg_scope
 def stack_blocks_dense(net, blocks, output_stride=None,
+                       store_non_strided_activations=False,
                       outputs_collections=None):
  """Stacks ResNet `Blocks` and controls output feature density.

@@ -154,6 +155,12 @@ def stack_blocks_dense(net, blocks, output_stride=None,
      For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
      then valid values for the output_stride are 1, 2, 6, 24 or None (which
      is equivalent to output_stride=24).
+    store_non_strided_activations: If True, we compute non-strided (undecimated)
+      activations at the last unit of each block and store them in the
+      `outputs_collections` before subsampling them. This gives us access to
+      higher resolution intermediate activations which are useful in some
+      dense prediction problems but increases 4x the computation and memory cost
+      at the last unit of each block.
    outputs_collections: Collection to add the ResNet block outputs.

  Returns:
@@ -173,9 +180,12 @@ def stack_blocks_dense(net, blocks, output_stride=None,

  for block in blocks:
    with tf.variable_scope(block.scope, 'block', [net]) as sc:
+      block_stride = 1
      for i, unit in enumerate(block.args):
-        if output_stride is not None and current_stride > output_stride:
-          raise ValueError('The target output_stride cannot be reached.')
+        if store_non_strided_activations and i == len(block.args) - 1:
+          # Move stride from the block's last unit to the end of the block.
+          block_stride = unit.get('stride', 1)
+          unit = dict(unit, stride=1)

        with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
          # If we have reached the target output_stride, then we need to employ
@@ -188,8 +198,21 @@ def stack_blocks_dense(net, blocks, output_stride=None,
          else:
            net = block.unit_fn(net, rate=1, **unit)
            current_stride *= unit.get('stride', 1)
+            if output_stride is not None and current_stride > output_stride:
+              raise ValueError('The target output_stride cannot be reached.')
+
+      # Collect activations at the block's end before performing subsampling.
      net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)

+      # Subsampling of the block's output activations.
+      if output_stride is not None and current_stride == output_stride:
+        rate *= block_stride
+      else:
+        net = subsample(net, block_stride)
+        current_stride *= block_stride
+        if output_stride is not None and current_stride > output_stride:
+          raise ValueError('The target output_stride cannot be reached.')
+
  if output_stride is not None and current_stride != output_stride:
    raise ValueError('The target output_stride cannot be reached.')


--- a/research/slim/nets/resnet_v1.py
+++ b/research/slim/nets/resnet_v1.py
@@ -137,6 +137,7 @@ def resnet_v1(inputs,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
+              store_non_strided_activations=False,
              reuse=None,
              scope=None):
  """Generator for v1 ResNet models.
@@ -181,6 +182,12 @@ def resnet_v1(inputs,
        To use this parameter, the input images must be smaller than 300x300
        pixels, in which case the output logit layer does not contain spatial
        information and can be removed.
+    store_non_strided_activations: If True, we compute non-strided (undecimated)
+      activations at the last unit of each block and store them in the
+      `outputs_collections` before subsampling them. This gives us access to
+      higher resolution intermediate activations which are useful in some
+      dense prediction problems but increases 4x the computation and memory cost
+      at the last unit of each block.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
@@ -213,7 +220,8 @@ def resnet_v1(inputs,
            output_stride /= 4
          net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
          net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
-        net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
+        net = resnet_utils.stack_blocks_dense(net, blocks, output_stride,
+                                              store_non_strided_activations)
        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection)
@@ -264,6 +272,7 @@ def resnet_v1_50(inputs,
                 global_pool=True,
                 output_stride=None,
                 spatial_squeeze=True,
+                 store_non_strided_activations=False,
                 reuse=None,
                 scope='resnet_v1_50'):
  """ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
@@ -276,6 +285,7 @@ def resnet_v1_50(inputs,
  return resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, spatial_squeeze=spatial_squeeze,
+                   store_non_strided_activations=store_non_strided_activations,
                   reuse=reuse, scope=scope)
 resnet_v1_50.default_image_size = resnet_v1.default_image_size

@@ -286,6 +296,7 @@ def resnet_v1_101(inputs,
                  global_pool=True,
                  output_stride=None,
                  spatial_squeeze=True,
+                  store_non_strided_activations=False,
                  reuse=None,
                  scope='resnet_v1_101'):
  """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
@@ -298,6 +309,7 @@ def resnet_v1_101(inputs,
  return resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, spatial_squeeze=spatial_squeeze,
+                   store_non_strided_activations=store_non_strided_activations,
                   reuse=reuse, scope=scope)
 resnet_v1_101.default_image_size = resnet_v1.default_image_size

@@ -307,6 +319,7 @@ def resnet_v1_152(inputs,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
+                  store_non_strided_activations=False,
                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v1_152'):
@@ -320,6 +333,7 @@ def resnet_v1_152(inputs,
  return resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, spatial_squeeze=spatial_squeeze,
+                   store_non_strided_activations=store_non_strided_activations,
                   reuse=reuse, scope=scope)
 resnet_v1_152.default_image_size = resnet_v1.default_image_size

@@ -329,6 +343,7 @@ def resnet_v1_200(inputs,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
+                  store_non_strided_activations=False,
                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v1_200'):
@@ -342,5 +357,6 @@ def resnet_v1_200(inputs,
  return resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, spatial_squeeze=spatial_squeeze,
+                   store_non_strided_activations=store_non_strided_activations,
                   reuse=reuse, scope=scope)
 resnet_v1_200.default_image_size = resnet_v1.default_image_size
--- a/research/slim/nets/resnet_v1_test.py
+++ b/research/slim/nets/resnet_v1_test.py
@@ -185,7 +185,7 @@ class ResnetUtilsTest(tf.test.TestCase):
        'tiny/block2/unit_2/bottleneck_v1/conv1',
        'tiny/block2/unit_2/bottleneck_v1/conv2',
        'tiny/block2/unit_2/bottleneck_v1/conv3']
-    self.assertItemsEqual(expected, end_points)
+    self.assertItemsEqual(expected, end_points.keys())

  def _stack_blocks_nondense(self, net, blocks):
    """A simplified ResNet Block stacker without output stride control."""
@@ -240,6 +240,70 @@ class ResnetUtilsTest(tf.test.TestCase):
              output, expected = sess.run([output, expected])
              self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)

+  def testStridingLastUnitVsSubsampleBlockEnd(self):
+    """Compares subsampling at the block's last unit or block's end.
+
+    Makes sure that the final output is the same when we use a stride at the
+    last unit of a block vs. we subsample activations at the end of a block.
+    """
+    block = resnet_v1.resnet_v1_block
+
+    blocks = [
+        block('block1', base_depth=1, num_units=2, stride=2),
+        block('block2', base_depth=2, num_units=2, stride=2),
+        block('block3', base_depth=4, num_units=2, stride=2),
+        block('block4', base_depth=8, num_units=2, stride=1),
+    ]
+
+    # Test both odd and even input dimensions.
+    height = 30
+    width = 31
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      with slim.arg_scope([slim.batch_norm], is_training=False):
+        for output_stride in [1, 2, 4, 8, None]:
+          with tf.Graph().as_default():
+            with self.test_session() as sess:
+              tf.set_random_seed(0)
+              inputs = create_test_input(1, height, width, 3)
+
+              # Subsampling at the last unit of the block.
+              output = resnet_utils.stack_blocks_dense(
+                  inputs, blocks, output_stride,
+                  store_non_strided_activations=False,
+                  outputs_collections='output')
+              output_end_points = slim.utils.convert_collection_to_dict(
+                  'output')
+
+              # Make the two networks use the same weights.
+              tf.get_variable_scope().reuse_variables()
+
+              # Subsample activations at the end of the blocks.
+              expected = resnet_utils.stack_blocks_dense(
+                  inputs, blocks, output_stride,
+                  store_non_strided_activations=True,
+                  outputs_collections='expected')
+              expected_end_points = slim.utils.convert_collection_to_dict(
+                  'expected')
+
+              sess.run(tf.global_variables_initializer())
+
+              # Make sure that the final output is the same.
+              output, expected = sess.run([output, expected])
+              self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
+
+              # Make sure that intermediate block activations in
+              # output_end_points are subsampled versions of the corresponding
+              # ones in expected_end_points.
+              for i, block in enumerate(blocks[:-1:]):
+                output = output_end_points[block.scope]
+                expected = expected_end_points[block.scope]
+                atrous_activated = (output_stride is not None and
+                                    2 ** i >= output_stride)
+                if not atrous_activated:
+                  expected = resnet_utils.subsample(expected, 2)
+                output, expected = sess.run([output, expected])
+                self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
+

 class ResnetCompleteNetworkTest(tf.test.TestCase):
  """Tests with complete small ResNet v1 networks."""

--- a/research/slim/nets/resnet_v2.py
+++ b/research/slim/nets/resnet_v2.py
@@ -212,7 +212,7 @@ def resnet_v2(inputs,
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
          end_points['global_pool'] = net
-        if num_classes is not None:
+        if num_classes:
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
          end_points[sc.name + '/logits'] = net

--- a/research/slim/nets/resnet_v2_test.py
+++ b/research/slim/nets/resnet_v2_test.py
@@ -185,7 +185,7 @@ class ResnetUtilsTest(tf.test.TestCase):
        'tiny/block2/unit_2/bottleneck_v2/conv1',
        'tiny/block2/unit_2/bottleneck_v2/conv2',
        'tiny/block2/unit_2/bottleneck_v2/conv3']
-    self.assertItemsEqual(expected, end_points)
+    self.assertItemsEqual(expected, end_points.keys())

  def _stack_blocks_nondense(self, net, blocks):
    """A simplified ResNet Block stacker without output stride control."""

--- a/research/slim/nets/vgg.py
+++ b/research/slim/nets/vgg.py
@@ -212,7 +212,7 @@ def vgg_16(inputs,
                          activation_fn=None,
                          normalizer_fn=None,
                          scope='fc8')
-        if spatial_squeeze and num_classes is not None:
+        if spatial_squeeze:
          net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points

--- a/research/slim/preprocessing/inception_preprocessing.py
+++ b/research/slim/preprocessing/inception_preprocessing.py
@@ -226,11 +226,12 @@ def preprocess_for_train(image, height, width, bbox,
    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image)

-    # Randomly distort the colors. There are 4 ways to do it.
+    # Randomly distort the colors. There are 1 or 4 ways to do it.
+    num_distort_cases = 1 if fast_mode else 4
    distorted_image = apply_with_random_selector(
        distorted_image,
        lambda x, ordering: distort_color(x, ordering, fast_mode),
-        num_cases=4)
+        num_cases=num_distort_cases)

    if add_image_summaries:
      tf.summary.image('final_distorted_image',

--- a/research/slim/preprocessing/vgg_preprocessing.py
+++ b/research/slim/preprocessing/vgg_preprocessing.py
@@ -253,8 +253,8 @@ def _smallest_size_at_least(height, width, smallest_side):
  scale = tf.cond(tf.greater(height, width),
                  lambda: smallest_side / width,
                  lambda: smallest_side / height)
-  new_height = tf.to_int32(height * scale)
-  new_width = tf.to_int32(width * scale)
+  new_height = tf.to_int32(tf.rint(height * scale))
+  new_width = tf.to_int32(tf.rint(width * scale))
  return new_height, new_width