Remove force_v2_in_keras_compile. experimental_run_tf_function is no-op now.

PiperOrigin-RevId: 299160422

Remove force_v2_in_keras_compile. experimental_run_tf_function is no-op now.
PiperOrigin-RevId: 299160422
d3d7f15f · Hongkun Yu · A. Unique TensorFlower · 12271d7c · d3d7f15f · d3d7f15f
Commit d3d7f15f authored Mar 05, 2020 by Hongkun Yu Committed by A. Unique TensorFlower Mar 05, 2020
11 changed files
--- a/official/benchmark/keras_cifar_benchmark.py
+++ b/official/benchmark/keras_cifar_benchmark.py
@@ -163,21 +163,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
    FLAGS.dtype = 'fp32'
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
-    """No dist strat forced v1 execution path."""
-    self._setup()
-    FLAGS.distribution_strategy = 'off'
-    FLAGS.num_gpus = 1
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_no_dist_strat_force_v1_path')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
  def benchmark_2_gpu(self):
    """Test keras based model with eager and distribution strategies."""
    self._setup()
@@ -261,17 +246,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_force_v1_path(self):
-    """Test 1 gpu using forced v1 execution path."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.distribution_strategy = 'one_device'
-    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
-    FLAGS.batch_size = 128
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
  def benchmark_graph_1_gpu(self):
    """Test 1 gpu graph."""
    self._setup()
@@ -316,33 +290,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
-    """No dist strat but forced v1 execution path."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.batch_size = 128
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_no_dist_strat_force_v1_path')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    FLAGS.distribution_strategy = 'off'
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
-  def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
-    """Forced v1 execution path and forced eager."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.batch_size = 128
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    FLAGS.run_eagerly = True
-    FLAGS.distribution_strategy = 'off'
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
  def benchmark_2_gpu(self):
    """Test 2 gpu."""
    self._setup()
@@ -409,19 +356,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()
-  def benchmark_cpu_no_dist_strat_force_v1_path(self):
-    """Test cpu without dist strat and force v1 in model.compile."""
-    self._setup()
-    FLAGS.num_gpus = 0
-    FLAGS.enable_eager = True
-    FLAGS.distribution_strategy = 'off'
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_cpu_no_dist_strat_force_v1_path')
-    FLAGS.batch_size = 128
-    FLAGS.data_format = 'channels_last'
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
  def benchmark_graph_cpu_no_dist_strat(self):
    """Test cpu graph mode without distribution strategies."""
    self._setup()

--- a/official/benchmark/models/resnet_cifar_main.py
+++ b/official/benchmark/models/resnet_cifar_main.py
@@ -205,18 +205,6 @@ def run(flags_obj):
  with strategy_scope:
    optimizer = common.get_optimizer(lr_schedule)
    model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES)
-    # TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
-    # a valid arg for this model. Also remove as a valid flag.
-    if flags_obj.force_v2_in_keras_compile is not None:
-      model.compile(
-          loss='sparse_categorical_crossentropy',
-          optimizer=optimizer,
-          metrics=(['sparse_categorical_accuracy']
-                   if flags_obj.report_accuracy_metrics else None),
-          run_eagerly=flags_obj.run_eagerly,
-          experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
-    else:
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=optimizer,

--- a/official/benchmark/ncf_keras_benchmark.py
+++ b/official/benchmark/ncf_keras_benchmark.py
@@ -142,25 +142,12 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_force_v1_path_early_stop(self):
-    self._setup()
-    FLAGS.early_stopping = True
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
  def benchmark_1_gpu_no_dist_strat_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
-    self._setup()
-    FLAGS.distribution_strategy = 'off'
-    FLAGS.early_stopping = True
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
  def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
@@ -174,13 +161,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()
-  def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
-    self._setup()
-    FLAGS.early_stopping = True
-    FLAGS.enable_xla = True
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
  def benchmark_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
@@ -233,14 +213,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark_mlperf_like()
-  def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
-    """1 GPU using compile/fit without dist_strat."""
-    self._setup()
-    FLAGS.train_epochs = 7
-    FLAGS.distribution_strategy = 'off'
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark()
  def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
    """1 GPU using compile/fit without dist_strat."""
    self._setup()
@@ -353,20 +325,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.epsilon = 1e-8
    self._run_and_report_benchmark_mlperf_like()
-  def benchmark_8_gpu_force_v1_path_mlperf_like(self):
-    """8 GPU using keras fit/compile v1 codepath."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.train_epochs = 17
-    FLAGS.batch_size = 1048576
-    FLAGS.eval_batch_size = 160000
-    FLAGS.learning_rate = 0.0045
-    FLAGS.beta1 = 0.25
-    FLAGS.beta2 = 0.5
-    FLAGS.epsilon = 1e-8
-    FLAGS.force_v2_in_keras_compile = False
-    self._run_and_report_benchmark_mlperf_like()
  def benchmark_8_gpu_ctl_mlperf_like(self):
    """8 GPU using CTL."""
    self._setup()

--- a/official/nlp/transformer/misc.py
+++ b/official/nlp/transformer/misc.py
@@ -75,7 +75,6 @@ def define_transformer_flags():
      tf_gpu_thread_mode=True,
      datasets_num_private_threads=True,
      enable_xla=True,
-      force_v2_in_keras_compile=True,
      fp16_implementation=True
  )

--- a/official/recommendation/ncf_common.py
+++ b/official/recommendation/ncf_common.py
@@ -157,7 +157,6 @@ def define_ncf_flags():
      loss_scale=True,
      dynamic_loss_scale=True,
      enable_xla=True,
-      force_v2_in_keras_compile=True
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

--- a/official/recommendation/ncf_keras_main.py
+++ b/official/recommendation/ncf_keras_main.py
@@ -299,14 +299,6 @@ def run_ncf(_):
          num_train_steps,
          num_eval_steps,
          generate_input_online=generate_input_online)
-    else:
-      # TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
-      # a valid arg for this model. Also remove as a valid flag.
-      if FLAGS.force_v2_in_keras_compile is not None:
-        keras_model.compile(
-            optimizer=optimizer,
-            run_eagerly=FLAGS.run_eagerly,
-            experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
    else:
      keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)

--- a/official/staging/shakespeare/shakespeare_benchmark.py
+++ b/official/staging/shakespeare/shakespeare_benchmark.py
@@ -176,19 +176,6 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_no_ds_force_v2(self):
-    """Benchmark 1 gpu no ds with force_v2 in keras.compile."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.training_data = self.train_data
-    FLAGS.batch_size = 64
-    FLAGS.train_epochs = 43
-    FLAGS.model_dir = ''
-    FLAGS.force_v2_in_keras_compile = True
-    FLAGS.distribution_strategy = 'off'
-    self._run_and_report_benchmark()
  def benchmark_xla_1_gpu(self):
    """Benchmark 1 gpu w/xla."""
    self._setup()
@@ -297,15 +284,6 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_no_ds_force_v2(self):
-    """Benchmark 1 gpu no ds, and force v2."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.batch_size = 64
-    FLAGS.force_v2_in_keras_compile = True
-    FLAGS.distribution_strategy = 'off'
-    self._run_and_report_benchmark()
  def benchmark_1_gpu_no_ds_run_eagerly(self):
    """Benchmark 1 gpu."""
    self._setup()

--- a/official/staging/shakespeare/shakespeare_main.py
+++ b/official/staging/shakespeare/shakespeare_main.py
@@ -59,8 +59,7 @@ def define_flags():
                                max_train_steps=False,
                                dtype=True,
                                loss_scale=True,
-                                enable_xla=True,
+                                enable_xla=True)
-                                force_v2_in_keras_compile=True)
  flags_core.set_defaults(train_epochs=43,
                          batch_size=64)
@@ -193,8 +192,7 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
                 tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
-        run_eagerly=flags_obj.run_eagerly,
+        run_eagerly=flags_obj.run_eagerly)
-        experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
  callbacks = []
  if checkpoint_dir:

--- a/official/utils/flags/_performance.py
+++ b/official/utils/flags/_performance.py
@@ -64,7 +64,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
                       dynamic_loss_scale=False, fp16_implementation=False,
                       loss_scale=False,
                       tf_data_experimental_slack=False, enable_xla=False,
-                       force_v2_in_keras_compile=False,
                       training_dataset_cache=False):
  """Register flags for specifying performance tuning arguments.
@@ -91,9 +90,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
    tf_data_experimental_slack: Determines whether to enable tf.data's
      `experimental_slack` option.
    enable_xla: Determines if XLA (auto clustering) is turned on.
-    force_v2_in_keras_compile: Forces the use of run_distribued path even if not
-      using a `strategy`. This is not the same as
-      `tf.distribute.OneDeviceStrategy`
    training_dataset_cache: Whether to cache the training dataset on workers.
       Typically used to improve training performance when training data is in
       remote storage and can fit into worker memory.
@@ -290,11 +286,4 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
        name="enable_xla", default=False,
        help="Whether to enable XLA auto jit compilation")
-  if force_v2_in_keras_compile:
-    flags.DEFINE_boolean(
-        name="force_v2_in_keras_compile", default=None,
-        help="Forces the use of run_distribued path even if not"
-             "using a `strategy`. This is not the same as"
-             "`tf.distribute.OneDeviceStrategy`")
  return key_flags
--- a/official/vision/image_classification/common.py
+++ b/official/vision/image_classification/common.py
@@ -213,7 +213,6 @@ def define_keras_flags(
                                fp16_implementation=True,
                                tf_data_experimental_slack=True,
                                enable_xla=True,
-                                force_v2_in_keras_compile=True,
                                training_dataset_cache=True)
  flags_core.define_image()
  flags_core.define_benchmark()

--- a/official/vision/image_classification/resnet_imagenet_main.py
+++ b/official/vision/image_classification/resnet_imagenet_main.py
@@ -215,17 +215,7 @@ def run(flags_obj):
    elif flags_obj.pruning_method:
      raise NotImplementedError(
          'Only polynomial_decay is currently supported.')
-    # TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
-    # a valid arg for this model. Also remove as a valid flag.
-    if flags_obj.force_v2_in_keras_compile is not None:
-      model.compile(
-          loss='sparse_categorical_crossentropy',
-          optimizer=optimizer,
-          metrics=(['sparse_categorical_accuracy']
-                   if flags_obj.report_accuracy_metrics else None),
-          run_eagerly=flags_obj.run_eagerly,
-          experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
-    else:
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=optimizer,