[ResNet / NCF] Test force V1 path and allow V2 path as default (#7383)

* force_v2_in_keras_compile FLAG default to None and added seperate temp path. * switch to force testing 1v path not force v2 path. * Rename function force_v1_path.

[ResNet / NCF] Test force V1 path and allow V2 path as default (#7383)
* force_v2_in_keras_compile FLAG default to None and added seperate temp path. * switch to force testing 1v path not force v2 path. * Rename function force_v1_path.
97622ffc · Toby Boyd · GitHub · c07abee7 · 97622ffc · 97622ffc
Unverified Commit 97622ffc authored Aug 05, 2019 by Toby Boyd Committed by GitHub Aug 05, 2019
7 changed files
--- a/official/recommendation/ncf_keras_benchmark.py
+++ b/official/recommendation/ncf_keras_benchmark.py
@@ -136,10 +136,10 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_force_v2_early_stop(self):
+  def benchmark_1_gpu_force_v1_path_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_1_gpu_no_dist_strat_early_stop(self):
@@ -148,11 +148,11 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_dist_strat_force_v2_early_stop(self):
+  def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
@@ -168,11 +168,11 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

-  def benchmark_xla_1_gpu_force_v2_early_stop(self):
+  def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_1_gpu_ctl_early_stop(self):
@@ -194,13 +194,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

-  def benchmark_2_gpus_early_stop_force_V2(self):
-    self._setup()
-    FLAGS.early_stopping = True
-    FLAGS.num_gpus = 2
-    FLAGS.force_v2_in_keras_compile = True
-    self._run_and_report_benchmark()
-
  def benchmark_2_gpus_ctl_early_stop(self):
    """NCF with custom training loop. Works only in TF 2.0."""
    self._setup()
@@ -225,12 +218,12 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark_mlperf_like()

-  def benchmark_1_gpu_no_dist_strat_force_v2_mlperf_like(self):
+  def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
    """1 GPU using compile/fit without dist_strat."""
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
@@ -281,8 +274,8 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.epsilon = 1e-8
    self._run_and_report_benchmark_mlperf_like()

-  def benchmark_8_gpu_force_v2_mlperf_like(self):
-    """8 GPU using keras fit/compile V2 codepath."""
+  def benchmark_8_gpu_force_v1_path_mlperf_like(self):
+    """8 GPU using keras fit/compile v1 codepath."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
@@ -291,7 +284,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark_mlperf_like()

  def benchmark_xla_8_gpu_mlperf_like(self):

--- a/official/recommendation/ncf_keras_main.py
+++ b/official/recommendation/ncf_keras_main.py
@@ -369,11 +369,17 @@ def run_ncf(_):

  else:
    with distribution_utils.get_strategy_scope(strategy):
-
-      keras_model.compile(
-          optimizer=optimizer,
-          run_eagerly=FLAGS.run_eagerly,
-          experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
+      # TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
+      # a valid arg for this model. Also remove as a valid flag.
+      if FLAGS.force_v2_in_keras_compile is not None:
+        keras_model.compile(
+            optimizer=optimizer,
+            run_eagerly=FLAGS.run_eagerly,
+            experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
+      else:
+        keras_model.compile(
+            optimizer=optimizer,
+            run_eagerly=FLAGS.run_eagerly)

      history = keras_model.fit(
          train_input_dataset,

--- a/official/resnet/keras/keras_cifar_benchmark.py
+++ b/official/resnet/keras/keras_cifar_benchmark.py
@@ -75,19 +75,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
    FLAGS.enable_eager = True
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_force_v2(self):
-    """Test keras based model with eager, DS, and force_v2 path."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v2')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    FLAGS.force_v2_in_keras_compile = True
-    self._run_and_report_benchmark()
-
  def benchmark_cpu(self):
    """Test keras based model on CPU."""
    self._setup()
@@ -115,22 +102,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

-  def benchmark_cpu_no_dist_strat_force_v2(self):
-    """Keras on CPU without dist strat but with force v2 in keras.compile."""
-    self._setup()
-    FLAGS.num_gpus = 0
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_cpu_no_dist_strat_force_v2')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    FLAGS.distribution_strategy = 'off'
-    FLAGS.data_format = 'channels_last'
-    FLAGS.force_v2_in_keras_compile = True
-    self._run_and_report_benchmark()
-
  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test keras based model on CPU w/forced eager and no dist_strat."""
    self._setup()
@@ -188,8 +159,8 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
    FLAGS.dtype = 'fp32'
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_dist_strat_force_v2(self):
-    """No dist strat but forced v2 execution path."""
+  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
+    """No dist strat forced v1 execution path."""
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.num_gpus = 1
@@ -197,26 +168,10 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_no_dist_strat_force_v2')
+        'benchmark_1_gpu_no_dist_strat_force_v1_path')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
-    FLAGS.force_v2_in_keras_compile = True
-    self._run_and_report_benchmark()
-
-  def benchmark_1_gpu_force_v2_run_eagerly(self):
-    """No dist strat but forced v2 path via tf.compile path and force eager."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_force_v2_run_eagerly')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    FLAGS.run_eagerly = True
-    FLAGS.distribution_strategy = 'off'
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_2_gpu(self):
@@ -299,15 +254,15 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_force_v2(self):
-    """Test 1 gpu using forced v2 execution path."""
+  def benchmark_1_gpu_force_v1_path(self):
+    """Test 1 gpu using forced v1 execution path."""
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
-    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
    FLAGS.batch_size = 128
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_graph_1_gpu(self):
@@ -353,31 +308,31 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_dist_strat_force_v2(self):
-    """No dist strat but forced v2 execution path."""
+  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
+    """No dist strat but forced v1 execution path."""
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_no_dist_strat_force_v2')
+        'benchmark_1_gpu_no_dist_strat_force_v1_path')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly(self):
-    """Forced v2 execution path and forced eager."""
+  def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
+    """Forced v1 execution path and forced eager."""
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly')
+        'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_2_gpu(self):
@@ -444,17 +399,17 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

-  def benchmark_cpu_no_dist_strat_force_v2(self):
-    """Test cpu without dist strat and force v2 in model.compile."""
+  def benchmark_cpu_no_dist_strat_force_v1_path(self):
+    """Test cpu without dist strat and force v1 in model.compile."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_cpu_no_dist_strat_force_v2')
+        'benchmark_cpu_no_dist_strat_force_v1_path')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_graph_cpu_no_dist_strat(self):

--- a/official/resnet/keras/keras_cifar_main.py
+++ b/official/resnet/keras/keras_cifar_main.py
@@ -153,13 +153,23 @@ def run(flags_obj):
    optimizer = keras_common.get_optimizer()
    model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES)

-    model.compile(
-        loss='categorical_crossentropy',
-        optimizer=optimizer,
-        metrics=(['categorical_accuracy']
-                 if flags_obj.report_accuracy_metrics else None),
-        run_eagerly=flags_obj.run_eagerly,
-        experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
+    # TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
+    # a valid arg for this model. Also remove as a valid flag.
+    if flags_obj.force_v2_in_keras_compile is not None:
+      model.compile(
+          loss='categorical_crossentropy',
+          optimizer=optimizer,
+          metrics=(['categorical_crossentropy']
+                   if flags_obj.report_accuracy_metrics else None),
+          run_eagerly=flags_obj.run_eagerly,
+          experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
+    else:
+      model.compile(
+          loss='categorical_crossentropy',
+          optimizer=optimizer,
+          metrics=(['categorical_crossentropy']
+                   if flags_obj.report_accuracy_metrics else None),
+          run_eagerly=flags_obj.run_eagerly)

  callbacks = keras_common.get_callbacks(
      learning_rate_schedule, cifar_preprocessing.NUM_IMAGES['train'])

--- a/official/resnet/keras/keras_imagenet_benchmark.py
+++ b/official/resnet/keras/keras_imagenet_benchmark.py
@@ -79,23 +79,6 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
    FLAGS.use_tensor_lr = True
    self._run_and_report_benchmark()

-  def benchmark_8_gpu_force_v2(self):
-    """Test Keras model with eager, dist_strat, force v2 and 8 GPUs."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 128 * 8
-    FLAGS.train_epochs = 90
-    FLAGS.epochs_between_evals = 10
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_force_v2')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    # Add some thread tunings to improve performance.
-    FLAGS.datasets_num_private_threads = 14
-    FLAGS.use_tensor_lr = True
-    FLAGS.force_v2_in_keras_compile = True
-    self._run_and_report_benchmark()
-
  def benchmark_8_gpu_fp16(self):
    """Test Keras model with eager, dist_strat, 8 GPUs, and fp16."""
    self._setup()
@@ -294,8 +277,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 64
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly(self):
-    """Forced v2 execution in tf.compile path and force eager."""
+  def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
+    """Forced v1 execution in tf.compile path and force eager."""
    self._setup()

    FLAGS.num_gpus = 1
@@ -303,13 +286,13 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_force_dist_strat_run_eagerly')
+        'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
    FLAGS.batch_size = 64
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly_tweaked(self):
-    """Forced v2 execution in tf.compile path and force eager."""
+  def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly_tweaked(self):
+    """Forced v1 execution in tf.compile path and force eager."""
    self._setup()

    FLAGS.num_gpus = 1
@@ -318,22 +301,22 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.explicit_gpu_placement = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_force_dist_strat_run_eagerly_tweaked')
+        'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly_tweaked')
    FLAGS.batch_size = 64
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_dist_strat_force_v2(self):
-    """No dist strat but forced v2 execution tf.compile path."""
+  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
+    """No dist strat but forced v1 execution tf.compile path."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_force_dist_strat')
+        'benchmark_1_gpu_no_dist_strat_force_v1_path')
    FLAGS.batch_size = 128
-    FLAGS.force_v2_in_keras_compile = True
+    FLAGS.force_v2_in_keras_compile = False
    self._run_and_report_benchmark()

  def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self):
@@ -572,18 +555,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    self._run_and_report_benchmark()

-  def benchmark_8_gpu_force_v2(self):
-    """Test Keras model with 8 GPUs and v2 codepath."""
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.enable_eager = True
-    FLAGS.distribution_strategy = 'default'
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_force_v2')
-    FLAGS.batch_size = 128 * 8  # 8 GPUs
-    FLAGS.force_v2_in_keras_compile = True
-    self._run_and_report_benchmark()
-
  def benchmark_8_gpu_tweaked(self):
    """Test Keras model with manual config tuning and 8 GPUs."""
    self._setup()

--- a/official/resnet/keras/keras_imagenet_main.py
+++ b/official/resnet/keras/keras_imagenet_main.py
@@ -189,13 +189,23 @@ def run(flags_obj):
      model = resnet_model.resnet50(
          num_classes=imagenet_preprocessing.NUM_CLASSES, dtype=dtype)

-    model.compile(
-        loss='sparse_categorical_crossentropy',
-        optimizer=optimizer,
-        metrics=(['sparse_categorical_accuracy']
-                 if flags_obj.report_accuracy_metrics else None),
-        run_eagerly=flags_obj.run_eagerly,
-        experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
+    # TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
+    # a valid arg for this model. Also remove as a valid flag.
+    if flags_obj.force_v2_in_keras_compile is not None:
+      model.compile(
+          loss='sparse_categorical_crossentropy',
+          optimizer=optimizer,
+          metrics=(['sparse_categorical_accuracy']
+                   if flags_obj.report_accuracy_metrics else None),
+          run_eagerly=flags_obj.run_eagerly,
+          experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
+    else:
+      model.compile(
+          loss='sparse_categorical_crossentropy',
+          optimizer=optimizer,
+          metrics=(['sparse_categorical_accuracy']
+                   if flags_obj.report_accuracy_metrics else None),
+          run_eagerly=flags_obj.run_eagerly)

  callbacks = keras_common.get_callbacks(
      learning_rate_schedule, imagenet_preprocessing.NUM_IMAGES['train'])

--- a/official/utils/flags/_performance.py
+++ b/official/utils/flags/_performance.py
@@ -282,7 +282,7 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,

  if force_v2_in_keras_compile:
    flags.DEFINE_boolean(
-        name="force_v2_in_keras_compile", default=False,
+        name="force_v2_in_keras_compile", default=None,
        help="Forces the use of run_distribued path even if not"
             "using a `strategy`. This is not the same as"
             "`tf.distribute.OneDeviceStrategy`")