Remove batch norm weight decay + a few other fixes (#2755)

178480ed · Neal Wu · GitHub · 4c37264d · 178480ed · 178480ed
Unverified Commit 178480ed authored Nov 09, 2017 by Neal Wu Committed by GitHub Nov 09, 2017
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 7 deletions

official/resnet/imagenet_main.py official/resnet/imagenet_main.py +4 -3

official/resnet/resnet_model.py official/resnet/resnet_model.py +5 -4

No files found.
--- a/official/resnet/imagenet_main.py
+++ b/official/resnet/imagenet_main.py
@@ -184,10 +184,11 @@ def resnet_model_fn(features, labels, mode, params):
  tf.identity(cross_entropy, name='cross_entropy')
  tf.summary.scalar('cross_entropy', cross_entropy)

-  # Add weight decay to the loss. We perform weight decay on all trainable
-  # variables, which includes batch norm beta and gamma variables.
+  # Add weight decay to the loss. We exclude the batch norm variables because
+  # doing so leads to a small improvement in accuracy.
  loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
-      [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
+      [tf.nn.l2_loss(v) for v in tf.trainable_variables()
+       if 'batch_normalization' not in v.name])

  if mode == tf.estimator.ModeKeys.TRAIN:
    # Scale the learning rate linearly with the batch size. When the batch size

--- a/official/resnet/resnet_model.py
+++ b/official/resnet/resnet_model.py
@@ -242,8 +242,8 @@ def cifar10_resnet_v2_generator(resnet_size, num_classes, data_format=None):
  def model(inputs, is_training):
    """Constructs the ResNet model given the inputs."""
    if data_format == 'channels_first':
-      # Convert from channels_last (NHWC) to channels_first (NCHW). This
-      # provides a large performance boost on GPU. See
+      # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
+      # This provides a large performance boost on GPU. See
      # https://www.tensorflow.org/performance/performance_guide#data_formats
      inputs = tf.transpose(inputs, [0, 3, 1, 2])

@@ -302,8 +302,9 @@ def imagenet_resnet_v2_generator(block_fn, layers, num_classes,
  def model(inputs, is_training):
    """Constructs the ResNet model given the inputs."""
    if data_format == 'channels_first':
-      # Convert from channels_last (NHWC) to channels_first (NCHW). This
-      # provides a large performance boost on GPU.
+      # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
+      # This provides a large performance boost on GPU. See
+      # https://www.tensorflow.org/performance/performance_guide#data_formats
      inputs = tf.transpose(inputs, [0, 3, 1, 2])

    inputs = conv2d_fixed_padding(