Merge pull request #4 from tensorflow/master

Updating

Merge pull request #4 from tensorflow/master
Updating
1e2ceffd · Ayushman Kumar · GitHub · 51e60bab · c7adbbe4 · 1e2ceffd
Unverified Commit 1e2ceffd authored Mar 12, 2020 by Ayushman Kumar Committed by GitHub Mar 12, 2020
20 changed files
--- a/official/nlp/xlnet/run_squad.py
+++ b/official/nlp/xlnet/run_squad.py
@@ -301,5 +301,4 @@ def main(unused_argv):


 if __name__ == "__main__":
-  assert tf.version.VERSION.startswith('2.')
  app.run(main)
--- a/official/nlp/xlnet/xlnet_modeling_test.py
+++ b/official/nlp/xlnet/xlnet_modeling_test.py
@@ -49,5 +49,4 @@ class PositionalEmbeddingLayerTest(tf.test.TestCase):
    self.assertAllClose(pos_emb, target)

 if __name__ == "__main__":
-  assert tf.version.VERSION.startswith('2.')
  tf.test.main()
--- a/official/r1/mnist/mnist_tpu.py
+++ b/official/r1/mnist/mnist_tpu.py
@@ -28,7 +28,7 @@ import sys

 # pylint: disable=g-bad-import-order
 from absl import app as absl_app  # pylint: disable=unused-import
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 # pylint: enable=g-bad-import-order

 # For open source environment, add grandparent directory for import
@@ -98,7 +98,7 @@ def model_fn(features, labels, mode, params):
        'class_ids': tf.argmax(logits, axis=1),
        'probabilities': tf.nn.softmax(logits),
    }
-    return tf.compat.v1.estimator.tpu.TPUEstimatorSpec(mode, predictions=predictions)
+    return tf.estimator.tpu.TPUEstimatorSpec(mode, predictions=predictions)

  logits = model(image, training=(mode == tf.estimator.ModeKeys.TRAIN))
  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
@@ -111,14 +111,14 @@ def model_fn(features, labels, mode, params):
        decay_rate=0.96)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    if FLAGS.use_tpu:
-      optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer)
-    return tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
+      optimizer = tf.tpu.CrossShardOptimizer(optimizer)
+    return tf.estimator.tpu.TPUEstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=optimizer.minimize(loss, tf.train.get_global_step()))

  if mode == tf.estimator.ModeKeys.EVAL:
-    return tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
+    return tf.estimator.tpu.TPUEstimatorSpec(
        mode=mode, loss=loss, eval_metrics=(metric_fn, [labels, logits]))


@@ -128,7 +128,7 @@ def train_input_fn(params):
  data_dir = params["data_dir"]
  # Retrieves the batch size for the current shard. The # of shards is
  # computed according to the input pipeline deployment. See
-  # `tf.compat.v1.estimator.tpu.RunConfig` for details.
+  # `tf.estimator.tpu.RunConfig` for details.
  ds = dataset.train(data_dir).cache().repeat().shuffle(
      buffer_size=50000).batch(batch_size, drop_remainder=True)
  return ds
@@ -159,16 +159,15 @@ def main(argv):
      project=FLAGS.gcp_project
  )

-  run_config = tf.compat.v1.estimator.tpu.RunConfig(
+  run_config = tf.estimator.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      model_dir=FLAGS.model_dir,
      session_config=tf.ConfigProto(
          allow_soft_placement=True, log_device_placement=True),
-      tpu_config=tf.compat.v1.estimator.tpu.TPUConfig(
-          FLAGS.iterations, FLAGS.num_shards),
+      tpu_config=tf.estimator.tpu.TPUConfig(FLAGS.iterations, FLAGS.num_shards),
  )

-  estimator = tf.compat.v1.estimator.tpu.TPUEstimator(
+  estimator = tf.estimator.tpu.TPUEstimator(
      model_fn=model_fn,
      use_tpu=FLAGS.use_tpu,
      train_batch_size=FLAGS.batch_size,
@@ -199,4 +198,5 @@ def main(argv):


 if __name__ == "__main__":
+  tf.disable_v2_behavior()
  absl_app.run(main)
--- a/official/r1/resnet/cifar10_main.py
+++ b/official/r1/resnet/cifar10_main.py
@@ -22,6 +22,7 @@ import os

 from absl import app as absl_app
 from absl import flags
+from six.moves import range
 import tensorflow as tf  # pylint: disable=g-bad-import-order

 from official.r1.resnet import resnet_model

--- a/official/r1/resnet/imagenet_main.py
+++ b/official/r1/resnet/imagenet_main.py
@@ -22,6 +22,7 @@ import os

 from absl import app as absl_app
 from absl import flags
+from six.moves import range
 import tensorflow as tf

 from official.r1.resnet import imagenet_preprocessing
@@ -307,7 +308,7 @@ def _get_block_sizes(resnet_size):
  except KeyError:
    err = ('Could not find layers for selected Resnet size.\n'
           'Size received: {}; sizes allowed: {}.'.format(
-               resnet_size, choices.keys()))
+               resnet_size, list(choices.keys())))
    raise ValueError(err)



--- a/official/r1/transformer/attention_layer.py
+++ b/official/r1/transformer/attention_layer.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf
+import tensorflow.compat.v1 as tf


 class Attention(tf.layers.Layer):

--- a/official/r1/transformer/dataset.py
+++ b/official/r1/transformer/dataset.py
@@ -54,7 +54,7 @@ from __future__ import print_function
 import math
 import os

-import tensorflow as tf
+import tensorflow.compat.v1 as tf

 from official.utils.misc import model_helpers


--- a/official/r1/transformer/embedding_layer.py
+++ b/official/r1/transformer/embedding_layer.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf  # pylint: disable=g-bad-import-order
+import tensorflow.compat.v1 as tf  # pylint: disable=g-bad-import-order

 from official.r1.utils import tpu as tpu_utils


--- a/official/r1/transformer/ffn_layer.py
+++ b/official/r1/transformer/ffn_layer.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf
+import tensorflow.compat.v1 as tf


 class FeedFowardNetwork(tf.layers.Layer):

--- a/official/r1/transformer/schedule.py
+++ b/official/r1/transformer/schedule.py
@@ -20,7 +20,7 @@ from __future__ import print_function

 import math

-import tensorflow as tf
+import tensorflow.compat.v1 as tf


 _TRAIN, _EVAL = tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL

--- a/official/r1/transformer/schedule_test.py
+++ b/official/r1/transformer/schedule_test.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 """Test Transformer's schedule manager."""

-import tensorflow as tf
+import tensorflow.compat.v1 as tf

 from official.r1.transformer import schedule


--- a/official/r1/transformer/transformer.py
+++ b/official/r1/transformer/transformer.py
@@ -22,7 +22,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf
+import tensorflow.compat.v1 as tf

 from official.nlp.transformer import beam_search_v1 as beam_search
 from official.nlp.transformer import model_utils

--- a/official/r1/transformer/transformer_main.py
+++ b/official/r1/transformer/transformer_main.py
@@ -29,7 +29,7 @@ import tempfile
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from absl import app as absl_app
 from absl import flags
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 # pylint: enable=g-bad-import-order

 from official.nlp.transformer import model_params

--- a/official/r1/transformer/translate.py
+++ b/official/r1/transformer/translate.py
@@ -23,7 +23,7 @@ import os
 # pylint: disable=g-bad-import-order
 from absl import app as absl_app
 from absl import flags
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 # pylint: enable=g-bad-import-order

 from official.nlp.transformer.utils import tokenizer

--- a/official/staging/training/controller.py
+++ b/official/staging/training/controller.py
@@ -94,13 +94,15 @@ class Controller(object):
    # TODO(rxsang): Support training until exhaustion by passing
    # `train_steps=-1`. Currently it cannot be supported with a host training
    # loop because break statements are not supported with distributed dataset.
-    if train_fn is not None and train_steps is None:
-      raise ValueError("`train_steps` is required when `train_fn` is provided.")
-    if train_fn is not None and steps_per_loop is None:
-      raise ValueError("`steps_per_loop` is required when `train_fn is "
-                       "provided.")
-    if not isinstance(steps_per_loop, int) or steps_per_loop < 1:
-      raise ValueError("`steps_per_loop` should be a positive integer")
+    if train_fn is not None:
+      if train_steps is None:
+        raise ValueError("`train_steps` is required when `train_fn` is "
+                         "provided.")
+      if steps_per_loop is None:
+        raise ValueError("`steps_per_loop` is required when `train_fn is "
+                         "provided.")
+      if not isinstance(steps_per_loop, int) or steps_per_loop < 1:
+        raise ValueError("`steps_per_loop` should be a positive integer")
    if summary_interval is not None and summary_interval <= 0:
      raise ValueError("`summary_interval` should be larger than 0")


--- a/official/staging/training/utils.py
+++ b/official/staging/training/utils.py
@@ -53,13 +53,17 @@ def create_loop_fn(step_fn):
    """
    try:
      step = 0
-      while (num_steps == -1 or step < num_steps):
-        outputs = step_fn(iterator)
-        if reduce_fn is not None:
-          state = reduce_fn(state, outputs)
-        step += 1
-      return state
+      # To make sure the OutOfRangeError exception can be handled well with
+      # async remote eager, we need to wrap the loop body in a `async_scope`.
+      with tf.experimental.async_scope():
+        while (num_steps == -1 or step < num_steps):
+          outputs = step_fn(iterator)
+          if reduce_fn is not None:
+            state = reduce_fn(state, outputs)
+          step += 1
+        return state
    except (StopIteration, tf.errors.OutOfRangeError):
+      tf.experimental.async_clear_error()
      return state

  return loop_fn

--- a/official/utils/misc/keras_utils.py
+++ b/official/utils/misc/keras_utils.py
@@ -117,8 +117,9 @@ class TimeHistory(tf.keras.callbacks.Callback):

      self.timestamp_log.append(BatchTimestamp(self.global_steps, now))
      logging.info(
-          'TimeHistory: %.2f examples/second between steps %d and %d',
-          examples_per_second, self.last_log_step, self.global_steps)
+          'TimeHistory: %.2f seconds, %.2f examples/second between steps %d '
+          'and %d', elapsed_time, examples_per_second, self.last_log_step,
+          self.global_steps)

      if self.summary_writer:
        with self.summary_writer.as_default():

--- a/official/vision/detection/modeling/losses.py
+++ b/official/vision/detection/modeling/losses.py
@@ -465,7 +465,9 @@ class RetinanetBoxLoss(object):
    # for instances, the regression targets of 512x512 input with 6 anchors on
    # P3-P7 pyramid is about [0.1, 0.1, 0.2, 0.2].
    normalizer = num_positives * 4.0
-    mask = tf.not_equal(box_targets, 0.0)
+    mask = tf.cast(tf.not_equal(box_targets, 0.0), dtype=tf.float32)
+    box_targets = tf.expand_dims(box_targets, axis=-1)
+    box_outputs = tf.expand_dims(box_outputs, axis=-1)
    box_loss = self._huber_loss(box_targets, box_outputs, sample_weight=mask)
    box_loss /= normalizer
    return box_loss

--- a/official/vision/image_classification/common.py
+++ b/official/vision/image_classification/common.py
@@ -275,12 +275,6 @@ def define_keras_flags(
      help='Whether to build a tf.while_loop inside the training loop on the '
      'host. Setting it to True is critical to have peak performance on '
      'TPU.')
-  flags.DEFINE_boolean(
-      name='use_tf_keras_layers', default=False,
-      help='Whether to use tf.keras.layers instead of tf.python.keras.layers.'
-      'It only changes imagenet resnet model layers for now. This flag is '
-      'a temporal flag during transition to tf.keras.layers. Do not use this '
-      'flag for external usage. this will be removed shortly.')

  if model:
    flags.DEFINE_string('model', 'resnet50_v1.5',

--- a/official/vision/image_classification/resnet/__init__.py
+++ b/official/vision/image_classification/resnet/__init__.py