Merge pull request #2099 from derekjchow/inception

Update slim/nets/inception*

Merge pull request #2099 from derekjchow/inception
Update slim/nets/inception*
f4b94e73 · Jonathan Huang · GitHub · 0839edea · 6b43f524 · f4b94e73
Commit f4b94e73 authored Aug 03, 2017 by Jonathan Huang Committed by GitHub Aug 03, 2017
5 changed files
--- a/slim/nets/inception_resnet_v2.py
+++ b/slim/nets/inception_resnet_v2.py
@@ -331,7 +331,7 @@ inception_resnet_v2.default_image_size = 299
 def inception_resnet_v2_arg_scope(weight_decay=0.00004,
                                  batch_norm_decay=0.9997,
                                  batch_norm_epsilon=0.001):
-  """Yields the scope with the default parameters for inception_resnet_v2.
+  """Returns the scope with the default parameters for inception_resnet_v2.

  Args:
    weight_decay: the weight decay for weights variables.

--- a/slim/nets/inception_v1.py
+++ b/slim/nets/inception_v1.py
@@ -93,7 +93,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -110,7 +111,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -132,7 +134,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -149,7 +152,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -166,7 +170,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -183,7 +188,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -200,7 +206,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -222,7 +229,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -239,7 +247,8 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(
+              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)
@@ -270,8 +279,8 @@ def inception_v1(inputs,
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    prediction_fn: a function to get predictions out of logits.
-    spatial_squeeze: if True, logits is of shape [B, C], if false logits is
-        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
+    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
+        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

--- a/slim/nets/inception_v2.py
+++ b/slim/nets/inception_v2.py
@@ -30,6 +30,8 @@ def inception_v2_base(inputs,
                      final_endpoint='Mixed_5c',
                      min_depth=16,
                      depth_multiplier=1.0,
+                      use_separable_conv=True,
+                      data_format='NHWC',
                      scope=None):
  """Inception v2 (6a2).

@@ -51,6 +53,9 @@ def inception_v2_base(inputs,
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
+    use_separable_conv: Use a separable convolution for the first layer
+      Conv2d_1a_7x7. If this is False, use a normal convolution instead.
+    data_format: Data format of the activations ('NHWC' or 'NCHW').
    scope: Optional variable_scope.

  Returns:
@@ -72,16 +77,29 @@ def inception_v2_base(inputs,
    raise ValueError('depth_multiplier is not greater than zero.')
  depth = lambda d: max(int(d * depth_multiplier), min_depth)

+  if data_format != 'NHWC' and data_format != 'NCHW':
+    raise ValueError('data_format must be either NHWC or NCHW.')
+  if data_format == 'NCHW' and use_separable_conv:
+    raise ValueError(
+        'separable convolution only supports NHWC layout. NCHW data format can'
+        ' only be used when use_separable_conv is False.'
+    )
+
+  concat_dim = 3 if data_format == 'NHWC' else 1
  with tf.variable_scope(scope, 'InceptionV2', [inputs]):
    with slim.arg_scope(
-        [slim.conv2d, slim.max_pool2d, slim.avg_pool2d, slim.separable_conv2d],
-        stride=1, padding='SAME'):
+        [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
+        stride=1,
+        padding='SAME',
+        data_format=data_format):

      # Note that sizes in the comments below assume an input spatial size of
      # 224x224, however, the inputs can be of any size greater 32x32.

      # 224 x 224 x 3
      end_point = 'Conv2d_1a_7x7'
+
+      if use_separable_conv:
        # depthwise_multiplier here is different from depth_multiplier.
        # depthwise_multiplier determines the output channels of the initial
        # depthwise conv (see docs for tf.nn.separable_conv2d), while
@@ -91,8 +109,19 @@ def inception_v2_base(inputs,
        # so that the separable convolution is not overparameterized.
        depthwise_multiplier = min(int(depth(64) / 3), 8)
        net = slim.separable_conv2d(
-          inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier,
-          stride=2, weights_initializer=trunc_normal(1.0),
+            inputs, depth(64), [7, 7],
+            depth_multiplier=depthwise_multiplier,
+            stride=2,
+            padding='SAME',
+            weights_initializer=trunc_normal(1.0),
+            scope=end_point)
+      else:
+        # Use a normal convolution instead of a separable convolution.
+        net = slim.conv2d(
+            inputs,
+            depth(64), [7, 7],
+            stride=2,
+            weights_initializer=trunc_normal(1.0),
            scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
@@ -145,7 +174,8 @@ def inception_v2_base(inputs,
              branch_3, depth(32), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 256
@@ -175,7 +205,8 @@ def inception_v2_base(inputs,
              branch_3, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 320
@@ -200,7 +231,7 @@ def inception_v2_base(inputs,
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(
              net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
+        net = tf.concat(axis=concat_dim, values=[branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
@@ -230,7 +261,8 @@ def inception_v2_base(inputs,
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
@@ -260,7 +292,8 @@ def inception_v2_base(inputs,
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
@@ -290,10 +323,10 @@ def inception_v2_base(inputs,
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
-
      # 14 x 14 x 576
      end_point = 'Mixed_4e'
      with tf.variable_scope(end_point):
@@ -321,7 +354,8 @@ def inception_v2_base(inputs,
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
@@ -346,7 +380,8 @@ def inception_v2_base(inputs,
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
                                     scope='MaxPool_1a_3x3')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 7 x 7 x 1024
@@ -376,10 +411,10 @@ def inception_v2_base(inputs,
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
-
      # 7 x 7 x 1024
      end_point = 'Mixed_5c'
      with tf.variable_scope(end_point):
@@ -407,7 +442,8 @@ def inception_v2_base(inputs,
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)
@@ -443,8 +479,8 @@ def inception_v2(inputs,
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    prediction_fn: a function to get predictions out of logits.
-    spatial_squeeze: if True, logits is of shape [B, C], if false logits is
-        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
+    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
+        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
@@ -504,7 +540,7 @@ def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
  handle tensors that define the kernel size.
      shape = tf.shape(input_tensor)
-      return = tf.pack([tf.minimum(shape[1], kernel_size[0]),
+      return = tf.stack([tf.minimum(shape[1], kernel_size[0]),
                         tf.minimum(shape[2], kernel_size[1])])

  """

--- a/slim/nets/inception_v2_test.py
+++ b/slim/nets/inception_v2_test.py
@@ -164,6 +164,68 @@ class InceptionV2Test(tf.test.TestCase):
    with self.assertRaises(ValueError):
      _ = inception.inception_v2(inputs, num_classes, depth_multiplier=0.0)

+  def testBuildEndPointsWithUseSeparableConvolutionFalse(self):
+    batch_size = 5
+    height, width = 224, 224
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    _, end_points = inception.inception_v2_base(inputs)
+
+    endpoint_keys = [
+        key for key in end_points.keys()
+        if key.startswith('Mixed') or key.startswith('Conv')
+    ]
+
+    _, end_points_with_replacement = inception.inception_v2_base(
+        inputs, use_separable_conv=False)
+
+    # The endpoint shapes must be equal to the original shape even when the
+    # separable convolution is replaced with a normal convolution.
+    for key in endpoint_keys:
+      original_shape = end_points[key].get_shape().as_list()
+      self.assertTrue(key in end_points_with_replacement)
+      new_shape = end_points_with_replacement[key].get_shape().as_list()
+      self.assertListEqual(original_shape, new_shape)
+
+  def testBuildEndPointsNCHWDataFormat(self):
+    batch_size = 5
+    height, width = 224, 224
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    _, end_points = inception.inception_v2_base(inputs)
+
+    endpoint_keys = [
+        key for key in end_points.keys()
+        if key.startswith('Mixed') or key.startswith('Conv')
+    ]
+
+    inputs_in_nchw = tf.random_uniform((batch_size, 3, height, width))
+    _, end_points_with_replacement = inception.inception_v2_base(
+        inputs_in_nchw, use_separable_conv=False, data_format='NCHW')
+
+    # With the 'NCHW' data format, all endpoint activations have a transposed
+    # shape from the original shape with the 'NHWC' layout.
+    for key in endpoint_keys:
+      transposed_original_shape = tf.transpose(
+          end_points[key], [0, 3, 1, 2]).get_shape().as_list()
+      self.assertTrue(key in end_points_with_replacement)
+      new_shape = end_points_with_replacement[key].get_shape().as_list()
+      self.assertListEqual(transposed_original_shape, new_shape)
+
+  def testBuildErrorsForDataFormats(self):
+    batch_size = 5
+    height, width = 224, 224
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+
+    # 'NCWH' data format is not supported.
+    with self.assertRaises(ValueError):
+      _ = inception.inception_v2_base(inputs, data_format='NCWH')
+
+    # 'NCHW' data format is not supported for separable convolution.
+    with self.assertRaises(ValueError):
+      _ = inception.inception_v2_base(inputs, data_format='NCHW')
+
  def testHalfSizeImages(self):
    batch_size = 5
    height, width = 112, 112

--- a/slim/nets/inception_v3.py
+++ b/slim/nets/inception_v3.py
@@ -454,8 +454,8 @@ def inception_v3(inputs,
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    prediction_fn: a function to get predictions out of logits.
-    spatial_squeeze: if True, logits is of shape [B, C], if false logits is
-        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
+    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
+        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    create_aux_logits: Whether to create the auxiliary logits.
@@ -547,7 +547,7 @@ def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
  handle tensors that define the kernel size.
      shape = tf.shape(input_tensor)
-      return = tf.pack([tf.minimum(shape[1], kernel_size[0]),
+      return = tf.stack([tf.minimum(shape[1], kernel_size[0]),
                         tf.minimum(shape[2], kernel_size[1])])

  """