Object detection and slim changes (#5843)

Add more eval metrics to model_main and support group norm for mobilenet v1 based models.

Object detection and slim changes (#5843)
Add more eval metrics to model_main and support group norm for mobilenet v1 based models.
62ce5d2a · pkulzc · GitHub · 7d032ea3 · a1337e01 · 62ce5d2a
Unverified Commit 62ce5d2a authored Dec 13, 2018 by pkulzc Committed by GitHub Dec 13, 2018
6 changed files
--- a/research/slim/nets/nasnet/nasnet.py
+++ b/research/slim/nets/nasnet/nasnet.py
@@ -52,6 +52,7 @@ def cifar_config():
      # This is used for the drop path probabilities since it needs to increase
      # the drop out probability over the course of training.
      total_training_steps=937500,
+      use_bounded_activation=False,
  )
@@ -78,6 +79,7 @@ def large_imagenet_config():
      data_format='NHWC',
      skip_reduction_layer_input=1,
      total_training_steps=250000,
+      use_bounded_activation=False,
  )
@@ -104,6 +106,7 @@ def mobile_imagenet_config():
      data_format='NHWC',
      skip_reduction_layer_input=0,
      total_training_steps=250000,
+      use_bounded_activation=False,
  )
@@ -223,6 +226,7 @@ def nasnet_large_arg_scope(weight_decay=5e-5,
 def _build_aux_head(net, end_points, num_classes, hparams, scope):
  """Auxiliary head used for all models across all datasets."""
+  activation_fn = tf.nn.relu6 if hparams.use_bounded_activation else tf.nn.relu
  with tf.variable_scope(scope):
    aux_logits = tf.identity(net)
    with tf.variable_scope('aux_logits'):
@@ -230,7 +234,7 @@ def _build_aux_head(net, end_points, num_classes, hparams, scope):
          aux_logits, [5, 5], stride=3, padding='VALID')
      aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='proj')
      aux_logits = slim.batch_norm(aux_logits, scope='aux_bn0')
-      aux_logits = tf.nn.relu(aux_logits)
+      aux_logits = activation_fn(aux_logits)
      # Shape of feature map before the final layer.
      shape = aux_logits.shape
      if hparams.data_format == 'NHWC':
@@ -239,7 +243,7 @@ def _build_aux_head(net, end_points, num_classes, hparams, scope):
        shape = shape[2:4]
      aux_logits = slim.conv2d(aux_logits, 768, shape, padding='VALID')
      aux_logits = slim.batch_norm(aux_logits, scope='aux_bn1')
-      aux_logits = tf.nn.relu(aux_logits)
+      aux_logits = activation_fn(aux_logits)
      aux_logits = tf.contrib.layers.flatten(aux_logits)
      aux_logits = slim.fully_connected(aux_logits, num_classes)
      end_points['AuxLogits'] = aux_logits
@@ -306,10 +310,12 @@ def build_nasnet_cifar(images, num_classes,
  normal_cell = nasnet_utils.NasNetANormalCell(
      hparams.num_conv_filters, hparams.drop_path_keep_prob,
-      total_num_cells, hparams.total_training_steps)
+      total_num_cells, hparams.total_training_steps,
+      hparams.use_bounded_activation)
  reduction_cell = nasnet_utils.NasNetAReductionCell(
      hparams.num_conv_filters, hparams.drop_path_keep_prob,
-      total_num_cells, hparams.total_training_steps)
+      total_num_cells, hparams.total_training_steps,
+      hparams.use_bounded_activation)
  with arg_scope([slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
                 is_training=is_training):
    with arg_scope([slim.avg_pool2d,
@@ -358,10 +364,12 @@ def build_nasnet_mobile(images, num_classes,
  normal_cell = nasnet_utils.NasNetANormalCell(
      hparams.num_conv_filters, hparams.drop_path_keep_prob,
-      total_num_cells, hparams.total_training_steps)
+      total_num_cells, hparams.total_training_steps,
+      hparams.use_bounded_activation)
  reduction_cell = nasnet_utils.NasNetAReductionCell(
      hparams.num_conv_filters, hparams.drop_path_keep_prob,
-      total_num_cells, hparams.total_training_steps)
+      total_num_cells, hparams.total_training_steps,
+      hparams.use_bounded_activation)
  with arg_scope([slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
                 is_training=is_training):
    with arg_scope([slim.avg_pool2d,
@@ -411,10 +419,12 @@ def build_nasnet_large(images, num_classes,
  normal_cell = nasnet_utils.NasNetANormalCell(
      hparams.num_conv_filters, hparams.drop_path_keep_prob,
-      total_num_cells, hparams.total_training_steps)
+      total_num_cells, hparams.total_training_steps,
+      hparams.use_bounded_activation)
  reduction_cell = nasnet_utils.NasNetAReductionCell(
      hparams.num_conv_filters, hparams.drop_path_keep_prob,
-      total_num_cells, hparams.total_training_steps)
+      total_num_cells, hparams.total_training_steps,
+      hparams.use_bounded_activation)
  with arg_scope([slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
                 is_training=is_training):
    with arg_scope([slim.avg_pool2d,
@@ -478,6 +488,7 @@ def _build_nasnet_base(images,
  filter_scaling = 1.0
  # true_cell_num accounts for the stem cells
  true_cell_num = 2 if stem_type == 'imagenet' else 0
+  activation_fn = tf.nn.relu6 if hparams.use_bounded_activation else tf.nn.relu
  for cell_num in range(hparams.num_cells):
    stride = 1
    if hparams.skip_reduction_layer_input:
@@ -513,14 +524,14 @@ def _build_nasnet_base(images,
    true_cell_num += 1
    if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and
        num_classes and is_training):
-      aux_net = tf.nn.relu(net)
+      aux_net = activation_fn(net)
      _build_aux_head(aux_net, end_points, num_classes, hparams,
                      scope='aux_{}'.format(cell_num))
    cell_outputs.append(net)
  # Final softmax layer
  with tf.variable_scope('final_layer'):
-    net = tf.nn.relu(net)
+    net = activation_fn(net)
    net = nasnet_utils.global_avg_pool(net)
    if add_and_check_endpoint('global_pool', net) or not num_classes:
      return net, end_points

--- a/research/slim/nets/nasnet/nasnet_test.py
+++ b/research/slim/nets/nasnet/nasnet_test.py
@@ -390,5 +390,21 @@ class NASNetTest(tf.test.TestCase):
    self.assertListEqual(predictions.get_shape().as_list(),
                         [batch_size, num_classes])
+  def testUseBoundedAcitvationCifarModel(self):
+    batch_size = 1
+    height, width = 32, 32
+    num_classes = 10
+    for use_bounded_activation in (True, False):
+      tf.reset_default_graph()
+      inputs = tf.random_uniform((batch_size, height, width, 3))
+      config = nasnet.cifar_config()
+      config.set_hparam('use_bounded_activation', use_bounded_activation)
+      with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
+        _, _ = nasnet.build_nasnet_cifar(
+            inputs, num_classes, config=config)
+      for node in tf.get_default_graph().as_graph_def().node:
+        if node.op.startswith('Relu'):
+          self.assertEqual(node.op == 'Relu6', use_bounded_activation)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/slim/nets/nasnet/nasnet_utils.py
+++ b/research/slim/nets/nasnet/nasnet_utils.py
@@ -40,6 +40,9 @@ slim = tf.contrib.slim
 DATA_FORMAT_NCHW = 'NCHW'
 DATA_FORMAT_NHWC = 'NHWC'
 INVALID = 'null'
+# The cap for tf.clip_by_value, it's hinted from the activation distribution
+# that the majority of activation values are in the range [-6, 6].
+CLIP_BY_VALUE_CAP = 6
 def calc_reduction_layers(num_cells, num_reduction_layers):
@@ -172,11 +175,13 @@ def _operation_to_info(operation):
  return num_layers, filter_shape
-def _stacked_separable_conv(net, stride, operation, filter_size):
+def _stacked_separable_conv(net, stride, operation, filter_size,
+                            use_bounded_activation):
  """Takes in an operations and parses it to the correct sep operation."""
  num_layers, kernel_size = _operation_to_info(operation)
+  activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu
  for layer_num in range(num_layers - 1):
-    net = tf.nn.relu(net)
+    net = activation_fn(net)
    net = slim.separable_conv2d(
        net,
        filter_size,
@@ -187,7 +192,7 @@ def _stacked_separable_conv(net, stride, operation, filter_size):
    net = slim.batch_norm(
        net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1))
    stride = 1
-  net = tf.nn.relu(net)
+  net = activation_fn(net)
  net = slim.separable_conv2d(
      net,
      filter_size,
@@ -223,10 +228,12 @@ def _operation_to_pooling_info(operation):
  return pooling_type, pooling_shape
-def _pooling(net, stride, operation):
+def _pooling(net, stride, operation, use_bounded_activation):
  """Parses operation and performs the correct pooling operation on net."""
  padding = 'SAME'
  pooling_type, pooling_shape = _operation_to_pooling_info(operation)
+  if use_bounded_activation:
+    net = tf.nn.relu6(net)
  if pooling_type == 'avg':
    net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding=padding)
  elif pooling_type == 'max':
@@ -248,11 +255,13 @@ class NasNetABaseCell(object):
      should be concatenated together.
    hiddenstate_indices: Determines what hiddenstates should be combined
      together with the specified operations to create the NASNet cell.
+    use_bounded_activation: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
  """
  def __init__(self, num_conv_filters, operations, used_hiddenstates,
               hiddenstate_indices, drop_path_keep_prob, total_num_cells,
-               total_training_steps):
+               total_training_steps, use_bounded_activation=False):
    self._num_conv_filters = num_conv_filters
    self._operations = operations
    self._used_hiddenstates = used_hiddenstates
@@ -260,6 +269,7 @@ class NasNetABaseCell(object):
    self._drop_path_keep_prob = drop_path_keep_prob
    self._total_num_cells = total_num_cells
    self._total_training_steps = total_training_steps
+    self._use_bounded_activation = use_bounded_activation
  def _reduce_prev_layer(self, prev_layer, curr_layer):
    """Matches dimension of prev_layer to the curr_layer."""
@@ -270,12 +280,13 @@ class NasNetABaseCell(object):
    prev_num_filters = get_channel_dim(prev_layer.shape)
    curr_filter_shape = int(curr_layer.shape[2])
    prev_filter_shape = int(prev_layer.shape[2])
+    activation_fn = tf.nn.relu6 if self._use_bounded_activation else tf.nn.relu
    if curr_filter_shape != prev_filter_shape:
-      prev_layer = tf.nn.relu(prev_layer)
+      prev_layer = activation_fn(prev_layer)
      prev_layer = factorized_reduction(
          prev_layer, curr_num_filters, stride=2)
    elif curr_num_filters != prev_num_filters:
-      prev_layer = tf.nn.relu(prev_layer)
+      prev_layer = activation_fn(prev_layer)
      prev_layer = slim.conv2d(
          prev_layer, curr_num_filters, 1, scope='prev_1x1')
      prev_layer = slim.batch_norm(prev_layer, scope='prev_bn')
@@ -288,14 +299,11 @@ class NasNetABaseCell(object):
    # Check to be sure prev layer stuff is setup correctly
    prev_layer = self._reduce_prev_layer(prev_layer, net)
-    net = tf.nn.relu(net)
+    net = tf.nn.relu6(net) if self._use_bounded_activation else tf.nn.relu(net)
    net = slim.conv2d(net, num_filters, 1, scope='1x1')
    net = slim.batch_norm(net, scope='beginning_bn')
-    split_axis = get_channel_index()
+    # num_or_size_splits=1
-    net = tf.split(axis=split_axis, num_or_size_splits=1, value=net)
+    net = [net]
-    for split in net:
-      assert int(split.shape[split_axis] == int(self._num_conv_filters *
-                                                self._filter_scaling))
    net.append(prev_layer)
    return net
@@ -335,6 +343,8 @@ class NasNetABaseCell(object):
          # Combine hidden states using 'add'.
          with tf.variable_scope('combine'):
            h = h1 + h2
+            if self._use_bounded_activation:
+              h = tf.nn.relu6(h)
          # Add hiddenstate to the list of hiddenstates we can choose from
          net.append(h)
@@ -353,18 +363,28 @@ class NasNetABaseCell(object):
    input_filters = get_channel_dim(net.shape)
    filter_size = self._filter_size
    if 'separable' in operation:
-      net = _stacked_separable_conv(net, stride, operation, filter_size)
+      net = _stacked_separable_conv(net, stride, operation, filter_size,
+                                    self._use_bounded_activation)
+      if self._use_bounded_activation:
+        net = tf.clip_by_value(net, -CLIP_BY_VALUE_CAP, CLIP_BY_VALUE_CAP)
    elif operation in ['none']:
+      if self._use_bounded_activation:
+        net = tf.nn.relu6(net)
      # Check if a stride is needed, then use a strided 1x1 here
      if stride > 1 or (input_filters != filter_size):
-        net = tf.nn.relu(net)
+        if not self._use_bounded_activation:
+          net = tf.nn.relu(net)
        net = slim.conv2d(net, filter_size, 1, stride=stride, scope='1x1')
        net = slim.batch_norm(net, scope='bn_1')
+        if self._use_bounded_activation:
+          net = tf.clip_by_value(net, -CLIP_BY_VALUE_CAP, CLIP_BY_VALUE_CAP)
    elif 'pool' in operation:
-      net = _pooling(net, stride, operation)
+      net = _pooling(net, stride, operation, self._use_bounded_activation)
      if input_filters != filter_size:
        net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1')
        net = slim.batch_norm(net, scope='bn_1')
+      if self._use_bounded_activation:
+        net = tf.clip_by_value(net, -CLIP_BY_VALUE_CAP, CLIP_BY_VALUE_CAP)
    else:
      raise ValueError('Unimplemented operation', operation)
@@ -456,7 +476,7 @@ class NasNetANormalCell(NasNetABaseCell):
  """NASNetA Normal Cell."""
  def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells,
-               total_training_steps):
+               total_training_steps, use_bounded_activation=False):
    operations = ['separable_5x5_2',
                  'separable_3x3_2',
                  'separable_5x5_2',
@@ -474,14 +494,15 @@ class NasNetANormalCell(NasNetABaseCell):
                                            hiddenstate_indices,
                                            drop_path_keep_prob,
                                            total_num_cells,
-                                            total_training_steps)
+                                            total_training_steps,
+                                            use_bounded_activation)
 class NasNetAReductionCell(NasNetABaseCell):
  """NASNetA Reduction Cell."""
  def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells,
-               total_training_steps):
+               total_training_steps, use_bounded_activation=False):
    operations = ['separable_5x5_2',
                  'separable_7x7_2',
                  'max_pool_3x3',
@@ -499,4 +520,5 @@ class NasNetAReductionCell(NasNetABaseCell):
                                               hiddenstate_indices,
                                               drop_path_keep_prob,
                                               total_num_cells,
-                                               total_training_steps)
+                                               total_training_steps,
+                                               use_bounded_activation)
--- a/research/slim/nets/nasnet/pnasnet.py
+++ b/research/slim/nets/nasnet/pnasnet.py
@@ -45,6 +45,7 @@ def large_imagenet_config():
      data_format='NHWC',
      skip_reduction_layer_input=1,
      total_training_steps=250000,
+      use_bounded_activation=False,
  )
@@ -62,6 +63,7 @@ def mobile_imagenet_config():
      data_format='NHWC',
      skip_reduction_layer_input=1,
      total_training_steps=250000,
+      use_bounded_activation=False,
  )
@@ -114,6 +116,7 @@ def _build_pnasnet_base(images,
  filter_scaling = 1.0
  # true_cell_num accounts for the stem cells
  true_cell_num = 2
+  activation_fn = tf.nn.relu6 if hparams.use_bounded_activation else tf.nn.relu
  for cell_num in range(hparams.num_cells):
    is_reduction = cell_num in reduction_indices
    stride = 2 if is_reduction else 1
@@ -134,7 +137,7 @@ def _build_pnasnet_base(images,
    if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and
        num_classes and is_training):
-      aux_net = tf.nn.relu(net)
+      aux_net = activation_fn(net)
      # pylint: disable=protected-access
      nasnet._build_aux_head(aux_net, end_points, num_classes, hparams,
                             scope='aux_{}'.format(cell_num))
@@ -142,7 +145,7 @@ def _build_pnasnet_base(images,
  # Final softmax layer
  with tf.variable_scope('final_layer'):
-    net = tf.nn.relu(net)
+    net = activation_fn(net)
    net = nasnet_utils.global_avg_pool(net)
    if add_and_check_endpoint('global_pool', net) or not num_classes:
      return net, end_points
@@ -184,7 +187,8 @@ def build_pnasnet_large(images,
  normal_cell = PNasNetNormalCell(hparams.num_conv_filters,
                                  hparams.drop_path_keep_prob, total_num_cells,
-                                  hparams.total_training_steps)
+                                  hparams.total_training_steps,
+                                  hparams.use_bounded_activation)
  with arg_scope(
      [slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
      is_training=is_training):
@@ -231,7 +235,8 @@ def build_pnasnet_mobile(images,
  normal_cell = PNasNetNormalCell(hparams.num_conv_filters,
                                  hparams.drop_path_keep_prob, total_num_cells,
-                                  hparams.total_training_steps)
+                                  hparams.total_training_steps,
+                                  hparams.use_bounded_activation)
  with arg_scope(
      [slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
      is_training=is_training):
@@ -259,7 +264,7 @@ class PNasNetNormalCell(nasnet_utils.NasNetABaseCell):
  """PNASNet Normal Cell."""
  def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells,
-               total_training_steps):
+               total_training_steps, use_bounded_activation=False):
    # Configuration for the PNASNet-5 model.
    operations = [
        'separable_5x5_2', 'max_pool_3x3', 'separable_7x7_2', 'max_pool_3x3',
@@ -271,4 +276,5 @@ class PNasNetNormalCell(nasnet_utils.NasNetABaseCell):
    super(PNasNetNormalCell, self).__init__(
        num_conv_filters, operations, used_hiddenstates, hiddenstate_indices,
-        drop_path_keep_prob, total_num_cells, total_training_steps)
+        drop_path_keep_prob, total_num_cells, total_training_steps,
+        use_bounded_activation)
--- a/research/slim/nets/nasnet/pnasnet_test.py
+++ b/research/slim/nets/nasnet/pnasnet_test.py
@@ -236,6 +236,21 @@ class PNASNetTest(tf.test.TestCase):
    self.assertListEqual(end_points['Stem'].shape.as_list(),
                         [batch_size, 135, 28, 28])
+  def testUseBoundedAcitvationMobileModel(self):
+    batch_size = 1
+    height, width = 224, 224
+    num_classes = 1000
+    for use_bounded_activation in (True, False):
+      tf.reset_default_graph()
+      inputs = tf.random_uniform((batch_size, height, width, 3))
+      config = pnasnet.mobile_imagenet_config()
+      config.set_hparam('use_bounded_activation', use_bounded_activation)
+      with slim.arg_scope(pnasnet.pnasnet_mobile_arg_scope()):
+        _, _ = pnasnet.build_pnasnet_mobile(
+            inputs, num_classes, config=config)
+      for node in tf.get_default_graph().as_graph_def().node:
+        if node.op.startswith('Relu'):
+          self.assertEqual(node.op == 'Relu6', use_bounded_activation)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/slim/nets/pix2pix.py
+++ b/research/slim/nets/pix2pix.py
@@ -90,7 +90,7 @@ def upsample(net, num_outputs, kernel_size, method='nn_upsample_conv'):
    net = layers.conv2d_transpose(
        net, num_outputs, [4, 4], stride=kernel_size, activation_fn=None)
  else:
-    raise ValueError('Unknown method: [%s]', method)
+    raise ValueError('Unknown method: [%s]' % method)
  return net
@@ -222,7 +222,8 @@ def pix2pix_generator(net,
  return logits, end_points
-def pix2pix_discriminator(net, num_filters, padding=2, is_training=False):
+def pix2pix_discriminator(net, num_filters, padding=2, pad_mode='REFLECT',
+                          activation_fn=tf.nn.leaky_relu, is_training=False):
  """Creates the Image2Image Translation Discriminator.
  Args:
@@ -231,6 +232,8 @@ def pix2pix_discriminator(net, num_filters, padding=2, is_training=False):
    num_filters: A list of the filters in the discriminator. The length of the
      list determines the number of layers in the discriminator.
    padding: Amount of reflection padding applied before each convolution.
+    pad_mode: mode for tf.pad, one of "CONSTANT", "REFLECT", or "SYMMETRIC".
+    activation_fn: activation fn for layers.conv2d.
    is_training: Whether or not the model is training or testing.
  Returns:
@@ -249,7 +252,7 @@ def pix2pix_discriminator(net, num_filters, padding=2, is_training=False):
        spatial_pad = tf.constant(
            [[0, 0], [padding, padding], [padding, padding], [0, 0]],
            dtype=tf.int32)
-        return tf.pad(net, spatial_pad, 'REFLECT')
+        return tf.pad(net, spatial_pad, pad_mode)
    else:
      return net
@@ -258,7 +261,7 @@ def pix2pix_discriminator(net, num_filters, padding=2, is_training=False):
      kernel_size=[4, 4],
      stride=2,
      padding='valid',
-      activation_fn=tf.nn.leaky_relu):
+      activation_fn=activation_fn):
    # No normalization on the input layer.
    net = layers.conv2d(