Merge pull request #3 from tensorflow/master

Updated

Merge pull request #3 from tensorflow/master
Updated
51e60bab · Ayushman Kumar · GitHub · 7653185e · 7d86c317 · 51e60bab
Unverified Commit 51e60bab authored Mar 08, 2020 by Ayushman Kumar Committed by GitHub Mar 08, 2020
11 changed files
--- a/official/nlp/transformer/misc.py
+++ b/official/nlp/transformer/misc.py
@@ -75,7 +75,6 @@ def define_transformer_flags():
      tf_gpu_thread_mode=True,
      datasets_num_private_threads=True,
      enable_xla=True,
-      force_v2_in_keras_compile=True,
      fp16_implementation=True
  )


--- a/official/recommendation/ncf_common.py
+++ b/official/recommendation/ncf_common.py
@@ -157,7 +157,6 @@ def define_ncf_flags():
      loss_scale=True,
      dynamic_loss_scale=True,
      enable_xla=True,
-      force_v2_in_keras_compile=True
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

--- a/official/recommendation/ncf_keras_main.py
+++ b/official/recommendation/ncf_keras_main.py
@@ -300,15 +300,7 @@ def run_ncf(_):
          num_eval_steps,
          generate_input_online=generate_input_online)
    else:
-      # TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
-      # a valid arg for this model. Also remove as a valid flag.
-      if FLAGS.force_v2_in_keras_compile is not None:
-        keras_model.compile(
-            optimizer=optimizer,
-            run_eagerly=FLAGS.run_eagerly,
-            experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
-      else:
-        keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)
+      keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)

      if not FLAGS.ml_perf:
        # Create Tensorboard summary and checkpoint callbacks.

--- a/official/staging/shakespeare/shakespeare_benchmark.py
+++ b/official/staging/shakespeare/shakespeare_benchmark.py
@@ -176,19 +176,6 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_ds_force_v2(self):
-    """Benchmark 1 gpu no ds with force_v2 in keras.compile."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.training_data = self.train_data
-    FLAGS.batch_size = 64
-    FLAGS.train_epochs = 43
-    FLAGS.model_dir = ''
-    FLAGS.force_v2_in_keras_compile = True
-    FLAGS.distribution_strategy = 'off'
-
-    self._run_and_report_benchmark()
-
  def benchmark_xla_1_gpu(self):
    """Benchmark 1 gpu w/xla."""
    self._setup()
@@ -297,15 +284,6 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

-  def benchmark_1_gpu_no_ds_force_v2(self):
-    """Benchmark 1 gpu no ds, and force v2."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.batch_size = 64
-    FLAGS.force_v2_in_keras_compile = True
-    FLAGS.distribution_strategy = 'off'
-    self._run_and_report_benchmark()
-
  def benchmark_1_gpu_no_ds_run_eagerly(self):
    """Benchmark 1 gpu."""
    self._setup()

--- a/official/staging/shakespeare/shakespeare_main.py
+++ b/official/staging/shakespeare/shakespeare_main.py
@@ -59,8 +59,7 @@ def define_flags():
                                max_train_steps=False,
                                dtype=True,
                                loss_scale=True,
-                                enable_xla=True,
-                                force_v2_in_keras_compile=True)
+                                enable_xla=True)

  flags_core.set_defaults(train_epochs=43,
                          batch_size=64)
@@ -193,8 +192,7 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
                 tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
-        run_eagerly=flags_obj.run_eagerly,
-        experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
+        run_eagerly=flags_obj.run_eagerly)

  callbacks = []
  if checkpoint_dir:

--- a/official/staging/training/grad_utils.py
+++ b/official/staging/training/grad_utils.py
@@ -104,7 +104,8 @@ def minimize_using_explicit_allreduce(tape,
        and model variables pairs as input, manipulate them, and returns a new
        gradients and model variables pairs. The callback functions will be
        invoked in the list order and before gradients are allreduced.
-        Default is no callbacks.
+        With mixed precision training, the pre_allreduce_allbacks will be
+        applied on scaled_gradients. Default is no callbacks.
      post_allreduce_callbacks: A list of callback functions that takes
        gradients and model variables pairs as input, manipulate them, and
        returns a new gradients and model variables paris. The callback

--- a/official/utils/flags/_performance.py
+++ b/official/utils/flags/_performance.py
@@ -64,7 +64,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
                       dynamic_loss_scale=False, fp16_implementation=False,
                       loss_scale=False,
                       tf_data_experimental_slack=False, enable_xla=False,
-                       force_v2_in_keras_compile=False,
                       training_dataset_cache=False):
  """Register flags for specifying performance tuning arguments.

@@ -91,9 +90,6 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
    tf_data_experimental_slack: Determines whether to enable tf.data's
      `experimental_slack` option.
    enable_xla: Determines if XLA (auto clustering) is turned on.
-    force_v2_in_keras_compile: Forces the use of run_distribued path even if not
-      using a `strategy`. This is not the same as
-      `tf.distribute.OneDeviceStrategy`
    training_dataset_cache: Whether to cache the training dataset on workers.
       Typically used to improve training performance when training data is in
       remote storage and can fit into worker memory.
@@ -290,11 +286,4 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
        name="enable_xla", default=False,
        help="Whether to enable XLA auto jit compilation")

-  if force_v2_in_keras_compile:
-    flags.DEFINE_boolean(
-        name="force_v2_in_keras_compile", default=None,
-        help="Forces the use of run_distribued path even if not"
-             "using a `strategy`. This is not the same as"
-             "`tf.distribute.OneDeviceStrategy`")
-
  return key_flags
--- a/official/vision/image_classification/cifar_preprocessing.py
+++ b/official/vision/image_classification/cifar_preprocessing.py
@@ -115,7 +115,6 @@ def get_filenames(is_training, data_dir):
 def input_fn(is_training,
             data_dir,
             batch_size,
-             num_epochs=1,
             dtype=tf.float32,
             datasets_num_private_threads=None,
             parse_record_fn=parse_record,
@@ -127,7 +126,6 @@ def input_fn(is_training,
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
-    num_epochs: The number of epochs to repeat the dataset.
    dtype: Data type to use for images/features
    datasets_num_private_threads: Number of private threads for tf.data.
    parse_record_fn: Function to use for parsing the records.
@@ -155,7 +153,6 @@ def input_fn(is_training,
      batch_size=batch_size,
      shuffle_buffer=NUM_IMAGES['train'],
      parse_record_fn=parse_record_fn,
-      num_epochs=num_epochs,
      dtype=dtype,
      datasets_num_private_threads=datasets_num_private_threads,
      drop_remainder=drop_remainder

--- a/official/vision/image_classification/common.py
+++ b/official/vision/image_classification/common.py
@@ -213,7 +213,6 @@ def define_keras_flags(
                                fp16_implementation=True,
                                tf_data_experimental_slack=True,
                                enable_xla=True,
-                                force_v2_in_keras_compile=True,
                                training_dataset_cache=True)
  flags_core.define_image()
  flags_core.define_benchmark()

--- a/official/vision/image_classification/imagenet_preprocessing.py
+++ b/official/vision/image_classification/imagenet_preprocessing.py
@@ -67,7 +67,6 @@ def process_record_dataset(dataset,
                           batch_size,
                           shuffle_buffer,
                           parse_record_fn,
-                           num_epochs=1,
                           dtype=tf.float32,
                           datasets_num_private_threads=None,
                           drop_remainder=False,
@@ -83,7 +82,6 @@ def process_record_dataset(dataset,
      time and use less memory.
    parse_record_fn: A function that takes a raw record and returns the
      corresponding (image, label) pair.
-    num_epochs: The number of epochs to repeat the dataset.
    dtype: Data type to use for images/features.
    datasets_num_private_threads: Number of threads for a private
      threadpool created for all datasets computation.
@@ -276,7 +274,6 @@ def get_parse_record_fn(use_keras_image_data_format=False):
 def input_fn(is_training,
             data_dir,
             batch_size,
-             num_epochs=1,
             dtype=tf.float32,
             datasets_num_private_threads=None,
             parse_record_fn=parse_record,
@@ -291,7 +288,6 @@ def input_fn(is_training,
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
-    num_epochs: The number of epochs to repeat the dataset.
    dtype: Data type to use for images/features
    datasets_num_private_threads: Number of private threads for tf.data.
    parse_record_fn: Function to use for parsing the records.
@@ -344,7 +340,6 @@ def input_fn(is_training,
      batch_size=batch_size,
      shuffle_buffer=_SHUFFLE_BUFFER,
      parse_record_fn=parse_record_fn,
-      num_epochs=num_epochs,
      dtype=dtype,
      datasets_num_private_threads=datasets_num_private_threads,
      drop_remainder=drop_remainder,

--- a/official/vision/image_classification/resnet_imagenet_main.py
+++ b/official/vision/image_classification/resnet_imagenet_main.py
@@ -215,23 +215,13 @@ def run(flags_obj):
    elif flags_obj.pruning_method:
      raise NotImplementedError(
          'Only polynomial_decay is currently supported.')
-    # TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
-    # a valid arg for this model. Also remove as a valid flag.
-    if flags_obj.force_v2_in_keras_compile is not None:
-      model.compile(
-          loss='sparse_categorical_crossentropy',
-          optimizer=optimizer,
-          metrics=(['sparse_categorical_accuracy']
-                   if flags_obj.report_accuracy_metrics else None),
-          run_eagerly=flags_obj.run_eagerly,
-          experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
-    else:
-      model.compile(
-          loss='sparse_categorical_crossentropy',
-          optimizer=optimizer,
-          metrics=(['sparse_categorical_accuracy']
-                   if flags_obj.report_accuracy_metrics else None),
-          run_eagerly=flags_obj.run_eagerly)
+
+    model.compile(
+        loss='sparse_categorical_crossentropy',
+        optimizer=optimizer,
+        metrics=(['sparse_categorical_accuracy']
+                 if flags_obj.report_accuracy_metrics else None),
+        run_eagerly=flags_obj.run_eagerly)

  train_epochs = flags_obj.train_epochs