Fix ResNet model convergence problem (#6721)

a182abc1 · Haoyu Zhang · Toby Boyd · 58deb059 · a182abc1 · a182abc1
Commit a182abc1 authored May 06, 2019 by Haoyu Zhang Committed by Toby Boyd May 06, 2019
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 11 deletions

official/resnet/keras/keras_common.py official/resnet/keras/keras_common.py +1 -1

official/resnet/keras/resnet_model.py official/resnet/keras/resnet_model.py +1 -10

No files found.
--- a/official/resnet/keras/keras_common.py
+++ b/official/resnet/keras/keras_common.py
@@ -368,7 +368,7 @@ def set_cudnn_batchnorm_mode():
  if FLAGS.batchnorm_spatial_persistent:
    os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
  else:
-    del os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT']
+    os.environ.pop('TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT', None)


 def _monkey_patch_org_assert_broadcastable():

--- a/official/resnet/keras/resnet_model.py
+++ b/official/resnet/keras/resnet_model.py
@@ -69,7 +69,6 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2a')(input_tensor)
  x = layers.BatchNormalization(axis=bn_axis,
-                                scale=False,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2a')(x)
@@ -81,7 +80,6 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2b')(x)
  x = layers.BatchNormalization(axis=bn_axis,
-                                scale=False,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2b')(x)
@@ -92,7 +90,6 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2c')(x)
  x = layers.BatchNormalization(axis=bn_axis,
-                                scale=False,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2c')(x)
@@ -139,7 +136,6 @@ def conv_block(input_tensor,
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2a')(input_tensor)
  x = layers.BatchNormalization(axis=bn_axis,
-                                scale=False,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2a')(x)
@@ -150,7 +146,6 @@ def conv_block(input_tensor,
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2b')(x)
  x = layers.BatchNormalization(axis=bn_axis,
-                                scale=False,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2b')(x)
@@ -161,7 +156,6 @@ def conv_block(input_tensor,
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2c')(x)
  x = layers.BatchNormalization(axis=bn_axis,
-                                scale=False,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2c')(x)
@@ -171,7 +165,6 @@ def conv_block(input_tensor,
                           kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                           name=conv_name_base + '1')(input_tensor)
  shortcut = layers.BatchNormalization(axis=bn_axis,
-                                       scale=False,
                                       momentum=BATCH_NORM_DECAY,
                                       epsilon=BATCH_NORM_EPSILON,
                                       name=bn_name_base + '1')(shortcut)
@@ -211,13 +204,11 @@ def resnet50(num_classes, dtype='float32', batch_size=None):
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name='conv1')(x)
  x = layers.BatchNormalization(axis=bn_axis,
-                                scale=False,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name='bn_conv1')(x)
  x = layers.Activation('relu')(x)
-  x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
-  x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
+  x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

  x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
  x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')