Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

47bc1813 · syiming · d8611151 · b035a227 · d8611151 · d8611151
Commit 47bc1813 authored Jul 01, 2020 by syiming
20 changed files
--- a/official/r1/transformer/ffn_layer.py
+++ b/official/r1/transformer/ffn_layer.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Implementation of fully connected network."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-
-
-class FeedFowardNetwork(tf.layers.Layer):
-  """Fully connected feedforward network."""
-
-  def __init__(self, hidden_size, filter_size, relu_dropout, train, allow_pad):
-    super(FeedFowardNetwork, self).__init__()
-    self.hidden_size = hidden_size
-    self.filter_size = filter_size
-    self.relu_dropout = relu_dropout
-    self.train = train
-    self.allow_pad = allow_pad
-
-    self.filter_dense_layer = tf.layers.Dense(
-        filter_size, use_bias=True, activation=tf.nn.relu, name="filter_layer")
-    self.output_dense_layer = tf.layers.Dense(
-        hidden_size, use_bias=True, name="output_layer")
-
-  def call(self, x, padding=None):
-    """Return outputs of the feedforward network.
-
-    Args:
-      x: tensor with shape [batch_size, length, hidden_size]
-      padding: (optional) If set, the padding values are temporarily removed
-        from x (provided self.allow_pad is set). The padding values are placed
-        back in the output tensor in the same locations.
-        shape [batch_size, length]
-
-    Returns:
-      Output of the feedforward network.
-      tensor with shape [batch_size, length, hidden_size]
-    """
-    padding = None if not self.allow_pad else padding
-
-    # Retrieve dynamically known shapes
-    batch_size = tf.shape(x)[0]
-    length = tf.shape(x)[1]
-
-    if padding is not None:
-      with tf.name_scope("remove_padding"):
-        # Flatten padding to [batch_size*length]
-        pad_mask = tf.reshape(padding, [-1])
-
-        nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9))
-
-        # Reshape x to [batch_size*length, hidden_size] to remove padding
-        x = tf.reshape(x, [-1, self.hidden_size])
-        x = tf.gather_nd(x, indices=nonpad_ids)
-
-        # Reshape x from 2 dimensions to 3 dimensions.
-        x.set_shape([None, self.hidden_size])
-        x = tf.expand_dims(x, axis=0)
-
-    output = self.filter_dense_layer(x)
-    if self.train:
-      output = tf.nn.dropout(output, 1.0 - self.relu_dropout)
-    output = self.output_dense_layer(output)
-
-    if padding is not None:
-      with tf.name_scope("re_add_padding"):
-        output = tf.squeeze(output, axis=0)
-        output = tf.scatter_nd(
-            indices=nonpad_ids,
-            updates=output,
-            shape=[batch_size * length, self.hidden_size]
-        )
-        output = tf.reshape(output, [batch_size, length, self.hidden_size])
-    return output
--- a/official/r1/transformer/schedule.py
+++ b/official/r1/transformer/schedule.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Abstract training on a step or epoch basis."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import tensorflow.compat.v1 as tf
-
-
-_TRAIN, _EVAL = tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL
-
-
-NUM_EXAMPLES = {
-    tf.estimator.ModeKeys.TRAIN: 4572160,
-    # # Examples that are too long are filtered out, thus the total is less
-    # # than the total number of lines.
-    # 2399123 +  # news-commentary-v12.de-en
-    # 1920209 +  # commoncrawl.de-en
-    # 270769,    # europarl-v7.de-en
-    tf.estimator.ModeKeys.EVAL: 3000,  # newstest2013
-}
-
-
-class Manager(object):
-  """Container for convenience functions to abstract step or epoch basis.
-  Transformer allows users to specify an epoch basis (generally recommended for
-  full training) or a number of steps basis (convenient since epochs are rather
-  large). TPUs furthermore require a step basis; however epochs are the norm in
-  the machine learning community and it is desirable to allow users to specify
-  epochs even when running with TPUS which requires behind the scenes
-  conversions.
-  This container simply groups what are largely mundane checks and conversions
-  rather than interspersing them throughout the run loop code.
-  """
-
-  def __init__(self, train_steps, steps_between_evals, train_epochs,
-               epochs_between_evals, default_train_epochs, batch_size,
-               max_length, use_tpu=False, num_tpu_shards=8):
-    if train_steps and train_epochs:
-      raise ValueError("Both train_steps or train_epochs were be defined.")
-
-    # Determine training schedule based on flags.
-    if train_steps:
-      self.train_eval_iterations = train_steps // steps_between_evals
-      self._single_iteration_train_steps = steps_between_evals
-      self._single_iteration_train_epochs = None
-    else:
-      train_epochs = train_epochs or default_train_epochs
-      self.train_eval_iterations = train_epochs // epochs_between_evals
-      self._single_iteration_train_steps = None
-      self._single_iteration_train_epochs = epochs_between_evals
-
-    self.max_length = max_length
-    self.batch_size = batch_size
-    self.use_tpu = use_tpu
-    self.num_tpu_shards = num_tpu_shards
-
-    if self.use_tpu:
-      assert (self.batch_size // self.max_length) % self.num_tpu_shards == 0
-
-  @property
-  def single_iteration_train_steps(self):
-    if self._single_iteration_train_steps or not self.use_tpu:
-      return self._single_iteration_train_steps
-
-    return self.epochs_to_steps(
-        num_epochs=self._single_iteration_train_epochs, mode=_TRAIN)
-
-  @property
-  def single_iteration_eval_steps(self):
-    if not self.use_tpu:
-      return None
-
-    return self.epochs_to_steps(num_epochs=1, mode=_EVAL)
-
-  @property
-  def train_increment_str(self):
-    if self._single_iteration_train_steps:
-      return "{} steps.".format(self._single_iteration_train_steps)
-
-    if not self.use_tpu:
-      return "{} epochs.".format(self._single_iteration_train_epochs)
-
-    return "~{} epochs. ({} steps)".format(
-        self._single_iteration_train_epochs,
-        self.single_iteration_train_steps)
-
-  @property
-  def repeat_dataset(self):
-    if (self._single_iteration_train_epochs is None and
-        self._single_iteration_train_steps > NUM_EXAMPLES[_TRAIN]):
-      return math.ceil(self._single_iteration_train_steps /
-                       NUM_EXAMPLES[_TRAIN])
-    return self._single_iteration_train_epochs
-
-  def epochs_to_steps(self, num_epochs, mode):
-    """Converts a number of epochs to a number of training steps.
-
-    TPU only: This function assumes that static_batch is True.
-
-      TPU can not tolerate an OutOfRange error from a dataset. As a result the
-    number of examples to be processed must be known ahead of time. TPUs also
-    do not allow partial batches, so this function rounds down.
-
-    Args:
-      num_epochs: An integer of the number of epochs to convert to steps.
-      mode: The estimator ModeKey of the computation
-
-    Returns:
-      An integer of the number of equivalent steps rounded down.
-    """
-    assert self.use_tpu, "epochs_to_steps should only be reached when using TPU"
-    total_num_tokens = NUM_EXAMPLES[mode] * self.max_length * num_epochs
-    return total_num_tokens // self.batch_size
--- a/official/r1/transformer/schedule_test.py
+++ b/official/r1/transformer/schedule_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Test Transformer's schedule manager."""
-
-import tensorflow.compat.v1 as tf
-
-from official.r1.transformer import schedule
-
-
-class ScheduleBaseTester(tf.test.TestCase):
-  def test_mutual_exclusivity(self):
-    with self.assertRaises(ValueError):
-      schedule.Manager(
-          train_steps=100, steps_between_evals=100, train_epochs=2,
-          epochs_between_evals=1, default_train_epochs=None, batch_size=2048,
-          max_length=256)
-
-  def test_step_basis(self):
-    manager = schedule.Manager(
-        train_steps=1000, steps_between_evals=100, train_epochs=None,
-        epochs_between_evals=None, default_train_epochs=None, batch_size=2048,
-        max_length=256)
-
-    self.assertEqual(manager.single_iteration_train_steps, 100)
-
-    # Evaluation uses the full set
-    self.assertIsNone(manager.single_iteration_eval_steps)
-
-    self.assertIsNone(manager.repeat_dataset)
-
-  def test_epoch_basis(self):
-    manager = schedule.Manager(
-        train_steps=None, steps_between_evals=None, train_epochs=10,
-        epochs_between_evals=2, default_train_epochs=None, batch_size=2048,
-        max_length=256)
-
-    # For non-TPU, estimator relies on dataset exhausion
-    self.assertIsNone(manager.single_iteration_train_steps)
-    self.assertIsNone(manager.single_iteration_eval_steps)
-
-    self.assertEqual(manager.repeat_dataset, 2)
-
-  def test_step_basis_tpu(self):
-    manager = schedule.Manager(
-        train_steps=1000, steps_between_evals=100, train_epochs=None,
-        epochs_between_evals=None, default_train_epochs=None, batch_size=2048,
-        max_length=256, use_tpu=True)
-
-    self.assertEqual(manager.single_iteration_train_steps, 100)
-    # num_eval_examples / (batch_size / max_length) == 3000 / (2048 / 256)
-    self.assertEqual(manager.single_iteration_eval_steps, 375)
-    self.assertIsNone(manager.repeat_dataset)
-
-  def test_epoch_basis_tpu(self):
-    manager = schedule.Manager(
-        train_steps=None, steps_between_evals=None, train_epochs=10,
-        epochs_between_evals=2, default_train_epochs=None, batch_size=2048,
-        max_length=256, use_tpu=True)
-
-    self.assertEqual(
-        manager.single_iteration_train_steps,
-        schedule.NUM_EXAMPLES[tf.estimator.ModeKeys.TRAIN] * 2 // (2048 / 256)
-    )
-
-    # num_eval_examples / (batch_size / max_length) == 3000 / (2048 / 256)
-    self.assertEqual(manager.single_iteration_eval_steps, 375)
-
-    self.assertEqual(manager.repeat_dataset, 2)
-
-
-if __name__ == "__main__":
-  tf.test.main()
--- a/official/r1/transformer/transformer.py
+++ b/official/r1/transformer/transformer.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Defines the Transformer model, and its encoder and decoder stacks.
-
-Model paper: https://arxiv.org/pdf/1706.03762.pdf
-Transformer model code source: https://github.com/tensorflow/tensor2tensor
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-
-from official.nlp.transformer import beam_search_v1 as beam_search
-from official.nlp.transformer import model_utils
-from official.nlp.transformer.utils.tokenizer import EOS_ID
-from official.r1.transformer import attention_layer
-from official.r1.transformer import embedding_layer
-from official.r1.transformer import ffn_layer
-
-_NEG_INF = -1e9
-
-
-class Transformer(object):
-  """Transformer model for sequence to sequence data.
-
-  Implemented as described in: https://arxiv.org/pdf/1706.03762.pdf
-
-  The Transformer model consists of an encoder and decoder. The input is an int
-  sequence (or a batch of sequences). The encoder produces a continous
-  representation, and the decoder uses the encoder output to generate
-  probabilities for the output sequence.
-  """
-
-  def __init__(self, params, train):
-    """Initialize layers to build Transformer model.
-
-    Args:
-      params: hyperparameter object defining layer sizes, dropout values, etc.
-      train: boolean indicating whether the model is in training mode. Used to
-        determine if dropout layers should be added.
-    """
-    self.train = train
-    self.params = params
-
-    self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
-        params["vocab_size"], params["hidden_size"],
-        method="matmul" if params["tpu"] else "gather")
-    self.encoder_stack = EncoderStack(params, train)
-    self.decoder_stack = DecoderStack(params, train)
-
-  def __call__(self, inputs, targets=None):
-    """Calculate target logits or inferred target sequences.
-
-    Args:
-      inputs: int tensor with shape [batch_size, input_length].
-      targets: None or int tensor with shape [batch_size, target_length].
-
-    Returns:
-      If targets is defined, then return logits for each word in the target
-      sequence. float tensor with shape [batch_size, target_length, vocab_size]
-      If target is none, then generate output sequence one token at a time.
-        returns a dictionary {
-          output: [batch_size, decoded length]
-          score: [batch_size, float]}
-    """
-    # Variance scaling is used here because it seems to work in many problems.
-    # Other reasonable initializers may also work just as well.
-    initializer = tf.variance_scaling_initializer(
-        self.params["initializer_gain"], mode="fan_avg", distribution="uniform")
-    with tf.variable_scope("Transformer", initializer=initializer):
-      # Calculate attention bias for encoder self-attention and decoder
-      # multi-headed attention layers.
-      attention_bias = model_utils.get_padding_bias(inputs)
-
-      # Run the inputs through the encoder layer to map the symbol
-      # representations to continuous representations.
-      encoder_outputs = self.encode(inputs, attention_bias)
-
-      # Generate output sequence if targets is None, or return logits if target
-      # sequence is known.
-      if targets is None:
-        return self.predict(encoder_outputs, attention_bias)
-      else:
-        logits = self.decode(targets, encoder_outputs, attention_bias)
-        return logits
-
-  def encode(self, inputs, attention_bias):
-    """Generate continuous representation for inputs.
-
-    Args:
-      inputs: int tensor with shape [batch_size, input_length].
-      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
-
-    Returns:
-      float tensor with shape [batch_size, input_length, hidden_size]
-    """
-    with tf.name_scope("encode"):
-      # Prepare inputs to the layer stack by adding positional encodings and
-      # applying dropout.
-      embedded_inputs = self.embedding_softmax_layer(inputs)
-      inputs_padding = model_utils.get_padding(inputs)
-
-      with tf.name_scope("add_pos_encoding"):
-        length = tf.shape(embedded_inputs)[1]
-        pos_encoding = model_utils.get_position_encoding(
-            length, self.params["hidden_size"])
-        encoder_inputs = embedded_inputs + pos_encoding
-
-      if self.train:
-        encoder_inputs = tf.nn.dropout(
-            encoder_inputs, 1 - self.params["layer_postprocess_dropout"])
-
-      return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
-
-  def decode(self, targets, encoder_outputs, attention_bias):
-    """Generate logits for each value in the target sequence.
-
-    Args:
-      targets: target values for the output sequence.
-        int tensor with shape [batch_size, target_length]
-      encoder_outputs: continuous representation of input sequence.
-        float tensor with shape [batch_size, input_length, hidden_size]
-      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
-
-    Returns:
-      float32 tensor with shape [batch_size, target_length, vocab_size]
-    """
-    with tf.name_scope("decode"):
-      # Prepare inputs to decoder layers by shifting targets, adding positional
-      # encoding and applying dropout.
-      decoder_inputs = self.embedding_softmax_layer(targets)
-      with tf.name_scope("shift_targets"):
-        # Shift targets to the right, and remove the last element
-        decoder_inputs = tf.pad(
-            decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
-      with tf.name_scope("add_pos_encoding"):
-        length = tf.shape(decoder_inputs)[1]
-        decoder_inputs += model_utils.get_position_encoding(
-            length, self.params["hidden_size"])
-      if self.train:
-        decoder_inputs = tf.nn.dropout(
-            decoder_inputs, 1 - self.params["layer_postprocess_dropout"])
-
-      # Run values
-      decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
-          length)
-      outputs = self.decoder_stack(
-          decoder_inputs, encoder_outputs, decoder_self_attention_bias,
-          attention_bias)
-      logits = self.embedding_softmax_layer.linear(outputs)
-      return logits
-
-  def _get_symbols_to_logits_fn(self, max_decode_length):
-    """Returns a decoding function that calculates logits of the next tokens."""
-
-    timing_signal = model_utils.get_position_encoding(
-        max_decode_length + 1, self.params["hidden_size"])
-    decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
-        max_decode_length)
-
-    def symbols_to_logits_fn(ids, i, cache):
-      """Generate logits for next potential IDs.
-
-      Args:
-        ids: Current decoded sequences.
-          int tensor with shape [batch_size * beam_size, i + 1]
-        i: Loop index
-        cache: dictionary of values storing the encoder output, encoder-decoder
-          attention bias, and previous decoder attention values.
-
-      Returns:
-        Tuple of
-          (logits with shape [batch_size * beam_size, vocab_size],
-           updated cache values)
-      """
-      # Set decoder input to the last generated IDs
-      decoder_input = ids[:, -1:]
-
-      # Preprocess decoder input by getting embeddings and adding timing signal.
-      decoder_input = self.embedding_softmax_layer(decoder_input)
-      decoder_input += timing_signal[i:i + 1]
-
-      self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1]
-      decoder_outputs = self.decoder_stack(
-          decoder_input, cache.get("encoder_outputs"), self_attention_bias,
-          cache.get("encoder_decoder_attention_bias"), cache)
-      logits = self.embedding_softmax_layer.linear(decoder_outputs)
-      logits = tf.squeeze(logits, axis=[1])
-      return logits, cache
-    return symbols_to_logits_fn
-
-  def predict(self, encoder_outputs, encoder_decoder_attention_bias):
-    """Return predicted sequence."""
-    batch_size = tf.shape(encoder_outputs)[0]
-    input_length = tf.shape(encoder_outputs)[1]
-    max_decode_length = input_length + self.params["extra_decode_length"]
-
-    symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length)
-
-    # Create initial set of IDs that will be passed into symbols_to_logits_fn.
-    initial_ids = tf.zeros([batch_size], dtype=tf.int32)
-
-    # Create cache storing decoder attention values for each layer.
-    cache = {
-        "layer_%d" % layer: {
-            "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]),
-            "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]),
-        } for layer in range(self.params["num_hidden_layers"])}
-
-    # Add encoder output and attention bias to the cache.
-    cache["encoder_outputs"] = encoder_outputs
-    cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias
-
-    # Use beam search to find the top beam_size sequences and scores.
-    decoded_ids, scores = beam_search.sequence_beam_search(
-        symbols_to_logits_fn=symbols_to_logits_fn,
-        initial_ids=initial_ids,
-        initial_cache=cache,
-        vocab_size=self.params["vocab_size"],
-        beam_size=self.params["beam_size"],
-        alpha=self.params["alpha"],
-        max_decode_length=max_decode_length,
-        eos_id=EOS_ID)
-
-    # Get the top sequence for each batch element
-    top_decoded_ids = decoded_ids[:, 0, 1:]
-    top_scores = scores[:, 0]
-
-    return {"outputs": top_decoded_ids, "scores": top_scores}
-
-
-class LayerNormalization(tf.layers.Layer):
-  """Applies layer normalization."""
-
-  def __init__(self, hidden_size):
-    super(LayerNormalization, self).__init__()
-    self.hidden_size = hidden_size
-
-  def build(self, _):
-    self.scale = tf.get_variable("layer_norm_scale", [self.hidden_size],
-                                 initializer=tf.ones_initializer())
-    self.bias = tf.get_variable("layer_norm_bias", [self.hidden_size],
-                                initializer=tf.zeros_initializer())
-    self.built = True
-
-  def call(self, x, epsilon=1e-6):
-    mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
-    variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keepdims=True)
-    norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
-    return norm_x * self.scale + self.bias
-
-
-class PrePostProcessingWrapper(object):
-  """Wrapper class that applies layer pre-processing and post-processing."""
-
-  def __init__(self, layer, params, train):
-    self.layer = layer
-    self.postprocess_dropout = params["layer_postprocess_dropout"]
-    self.train = train
-
-    # Create normalization layer
-    self.layer_norm = LayerNormalization(params["hidden_size"])
-
-  def __call__(self, x, *args, **kwargs):
-    # Preprocessing: apply layer normalization
-    y = self.layer_norm(x)
-
-    # Get layer output
-    y = self.layer(y, *args, **kwargs)
-
-    # Postprocessing: apply dropout and residual connection
-    if self.train:
-      y = tf.nn.dropout(y, 1 - self.postprocess_dropout)
-    return x + y
-
-
-class EncoderStack(tf.layers.Layer):
-  """Transformer encoder stack.
-
-  The encoder stack is made up of N identical layers. Each layer is composed
-  of the sublayers:
-    1. Self-attention layer
-    2. Feedforward network (which is 2 fully-connected layers)
-  """
-
-  def __init__(self, params, train):
-    super(EncoderStack, self).__init__()
-    self.layers = []
-    for _ in range(params["num_hidden_layers"]):
-      # Create sublayers for each layer.
-      self_attention_layer = attention_layer.SelfAttention(
-          params["hidden_size"], params["num_heads"],
-          params["attention_dropout"], train)
-      feed_forward_network = ffn_layer.FeedFowardNetwork(
-          params["hidden_size"], params["filter_size"],
-          params["relu_dropout"], train, params["allow_ffn_pad"])
-
-      self.layers.append([
-          PrePostProcessingWrapper(self_attention_layer, params, train),
-          PrePostProcessingWrapper(feed_forward_network, params, train)])
-
-    # Create final layer normalization layer.
-    self.output_normalization = LayerNormalization(params["hidden_size"])
-
-  def call(self, encoder_inputs, attention_bias, inputs_padding):
-    """Return the output of the encoder layer stacks.
-
-    Args:
-      encoder_inputs: tensor with shape [batch_size, input_length, hidden_size]
-      attention_bias: bias for the encoder self-attention layer.
-        [batch_size, 1, 1, input_length]
-      inputs_padding: P
-
-    Returns:
-      Output of encoder layer stack.
-      float32 tensor with shape [batch_size, input_length, hidden_size]
-    """
-    for n, layer in enumerate(self.layers):
-      # Run inputs through the sublayers.
-      self_attention_layer = layer[0]
-      feed_forward_network = layer[1]
-
-      with tf.variable_scope("layer_%d" % n):
-        with tf.variable_scope("self_attention"):
-          encoder_inputs = self_attention_layer(encoder_inputs, attention_bias)
-        with tf.variable_scope("ffn"):
-          encoder_inputs = feed_forward_network(encoder_inputs, inputs_padding)
-
-    return self.output_normalization(encoder_inputs)
-
-
-class DecoderStack(tf.layers.Layer):
-  """Transformer decoder stack.
-
-  Like the encoder stack, the decoder stack is made up of N identical layers.
-  Each layer is composed of the sublayers:
-    1. Self-attention layer
-    2. Multi-headed attention layer combining encoder outputs with results from
-       the previous self-attention layer.
-    3. Feedforward network (2 fully-connected layers)
-  """
-
-  def __init__(self, params, train):
-    super(DecoderStack, self).__init__()
-    self.layers = []
-    for _ in range(params["num_hidden_layers"]):
-      self_attention_layer = attention_layer.SelfAttention(
-          params["hidden_size"], params["num_heads"],
-          params["attention_dropout"], train)
-      enc_dec_attention_layer = attention_layer.Attention(
-          params["hidden_size"], params["num_heads"],
-          params["attention_dropout"], train)
-      feed_forward_network = ffn_layer.FeedFowardNetwork(
-          params["hidden_size"], params["filter_size"],
-          params["relu_dropout"], train, params["allow_ffn_pad"])
-
-      self.layers.append([
-          PrePostProcessingWrapper(self_attention_layer, params, train),
-          PrePostProcessingWrapper(enc_dec_attention_layer, params, train),
-          PrePostProcessingWrapper(feed_forward_network, params, train)])
-
-    self.output_normalization = LayerNormalization(params["hidden_size"])
-
-  def call(self, decoder_inputs, encoder_outputs, decoder_self_attention_bias,
-           attention_bias, cache=None):
-    """Return the output of the decoder layer stacks.
-
-    Args:
-      decoder_inputs: tensor with shape [batch_size, target_length, hidden_size]
-      encoder_outputs: tensor with shape [batch_size, input_length, hidden_size]
-      decoder_self_attention_bias: bias for decoder self-attention layer.
-        [1, 1, target_len, target_length]
-      attention_bias: bias for encoder-decoder attention layer.
-        [batch_size, 1, 1, input_length]
-      cache: (Used for fast decoding) A nested dictionary storing previous
-        decoder self-attention values. The items are:
-          {layer_n: {"k": tensor with shape [batch_size, i, key_channels],
-                     "v": tensor with shape [batch_size, i, value_channels]},
-           ...}
-
-    Returns:
-      Output of decoder layer stack.
-      float32 tensor with shape [batch_size, target_length, hidden_size]
-    """
-    for n, layer in enumerate(self.layers):
-      self_attention_layer = layer[0]
-      enc_dec_attention_layer = layer[1]
-      feed_forward_network = layer[2]
-
-      # Run inputs through the sublayers.
-      layer_name = "layer_%d" % n
-      layer_cache = cache[layer_name] if cache is not None else None
-      with tf.variable_scope(layer_name):
-        with tf.variable_scope("self_attention"):
-          decoder_inputs = self_attention_layer(
-              decoder_inputs, decoder_self_attention_bias, cache=layer_cache)
-        with tf.variable_scope("encdec_attention"):
-          decoder_inputs = enc_dec_attention_layer(
-              decoder_inputs, encoder_outputs, attention_bias)
-        with tf.variable_scope("ffn"):
-          decoder_inputs = feed_forward_network(decoder_inputs)
-
-    return self.output_normalization(decoder_inputs)
--- a/official/r1/transformer/transformer_main.py
+++ b/official/r1/transformer/transformer_main.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Train and evaluate the Transformer model.
-
-See README for description of setting the training schedule and evaluating the
-BLEU score.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-
-# pylint: disable=g-bad-import-order
-from six.moves import xrange  # pylint: disable=redefined-builtin
-from absl import app as absl_app
-from absl import flags
-import tensorflow.compat.v1 as tf
-# pylint: enable=g-bad-import-order
-
-from official.nlp.transformer import model_params
-from official.r1.utils import export
-from official.r1.utils import tpu as tpu_util
-from official.r1.transformer import translate
-from official.r1.transformer import transformer
-from official.r1.transformer import dataset
-from official.r1.transformer import schedule
-from official.nlp.transformer import compute_bleu
-from official.nlp.transformer.utils import metrics
-from official.nlp.transformer.utils import tokenizer
-from official.utils.flags import core as flags_core
-from official.r1.utils.logs import hooks_helper
-from official.r1.utils.logs import logger
-from official.utils.misc import distribution_utils
-from official.utils.misc import model_helpers
-
-PARAMS_MAP = {
-    "tiny": model_params.TINY_PARAMS,
-    "base": model_params.BASE_PARAMS,
-    "big": model_params.BIG_PARAMS,
-}
-
-
-DEFAULT_TRAIN_EPOCHS = 10
-INF = 1000000000  # 1e9
-BLEU_DIR = "bleu"
-
-# Dictionary containing tensors that are logged by the logging hooks. Each item
-# maps a string to the tensor name.
-TENSORS_TO_LOG = {
-    "learning_rate": "model/get_train_op/learning_rate/learning_rate",
-    "cross_entropy_loss": "model/cross_entropy"}
-
-
-def model_fn(features, labels, mode, params):
-  """Defines how to train, evaluate and predict from the transformer model."""
-  with tf.variable_scope("model"):
-    inputs, targets = features, labels
-
-    # Create model and get output logits.
-    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)
-
-    logits = model(inputs, targets)
-
-    # When in prediction mode, the labels/targets is None. The model output
-    # is the prediction
-    if mode == tf.estimator.ModeKeys.PREDICT:
-      if params["use_tpu"]:
-        raise NotImplementedError("Prediction is not yet supported on TPUs.")
-      return tf.estimator.EstimatorSpec(
-          tf.estimator.ModeKeys.PREDICT,
-          predictions=logits,
-          export_outputs={
-              "translate": tf.estimator.export.PredictOutput(logits)
-          })
-
-    # Explicitly set the shape of the logits for XLA (TPU). This is needed
-    # because the logits are passed back to the host VM CPU for metric
-    # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
-    # it is known from Transformer that the first two dimensions of logits
-    # are the dimensions of targets. Note that the ambiguous shape of logits is
-    # not a problem when computing xentropy, because padded_cross_entropy_loss
-    # resolves the shape on the TPU.
-    logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])
-
-    # Calculate model loss.
-    # xentropy contains the cross entropy loss of every nonpadding token in the
-    # targets.
-    xentropy, weights = metrics.padded_cross_entropy_loss(
-        logits, targets, params["label_smoothing"], params["vocab_size"])
-    loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)
-
-    # Save loss as named tensor that will be logged with the logging hook.
-    tf.identity(loss, "cross_entropy")
-
-    if mode == tf.estimator.ModeKeys.EVAL:
-      if params["use_tpu"]:
-        # host call functions should only have tensors as arguments.
-        # This lambda pre-populates params so that metric_fn is
-        # TPUEstimator compliant.
-        metric_fn = lambda logits, labels: (
-            metrics.get_eval_metrics(logits, labels, params=params))
-        eval_metrics = (metric_fn, [logits, labels])
-        return tf.estimator.tpu.TPUEstimatorSpec(
-            mode=mode,
-            loss=loss,
-            predictions={"predictions": logits},
-            eval_metrics=eval_metrics)
-      return tf.estimator.EstimatorSpec(
-          mode=mode, loss=loss, predictions={"predictions": logits},
-          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
-    else:
-      train_op, metric_dict = get_train_op_and_metrics(loss, params)
-
-      # Epochs can be quite long. This gives some intermediate information
-      # in TensorBoard.
-      metric_dict["minibatch_loss"] = loss
-      if params["use_tpu"]:
-        return tf.estimator.tpu.TPUEstimatorSpec(
-            mode=mode,
-            loss=loss,
-            train_op=train_op,
-            host_call=tpu_util.construct_scalar_host_call(
-                metric_dict=metric_dict,
-                model_dir=params["model_dir"],
-                prefix="training/"))
-      record_scalars(metric_dict)
-      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
-
-
-def record_scalars(metric_dict):
-  for key, value in metric_dict.items():
-    tf.summary.scalar(name=key, tensor=value)
-
-
-def get_learning_rate(learning_rate, hidden_size, learning_rate_warmup_steps):
-  """Calculate learning rate with linear warmup and rsqrt decay."""
-  with tf.name_scope("learning_rate"):
-    warmup_steps = tf.to_float(learning_rate_warmup_steps)
-    step = tf.to_float(tf.train.get_or_create_global_step())
-
-    learning_rate *= (hidden_size ** -0.5)
-    # Apply linear warmup
-    learning_rate *= tf.minimum(1.0, step / warmup_steps)
-    # Apply rsqrt decay
-    learning_rate *= tf.rsqrt(tf.maximum(step, warmup_steps))
-
-    # Create a named tensor that will be logged using the logging hook.
-    # The full name includes variable and names scope. In this case, the name
-    # is model/get_train_op/learning_rate/learning_rate
-    tf.identity(learning_rate, "learning_rate")
-
-    return learning_rate
-
-
-def get_train_op_and_metrics(loss, params):
-  """Generate training op and metrics to save in TensorBoard."""
-  with tf.variable_scope("get_train_op"):
-    learning_rate = get_learning_rate(
-        learning_rate=params["learning_rate"],
-        hidden_size=params["hidden_size"],
-        learning_rate_warmup_steps=params["learning_rate_warmup_steps"])
-
-    # Create optimizer. Use LazyAdamOptimizer from TF contrib, which is faster
-    # than the TF core Adam optimizer.
-    from tensorflow.contrib import opt as contrib_opt  # pylint: disable=g-import-not-at-top
-    optimizer = contrib_opt.LazyAdamOptimizer(
-        learning_rate,
-        beta1=params["optimizer_adam_beta1"],
-        beta2=params["optimizer_adam_beta2"],
-        epsilon=params["optimizer_adam_epsilon"])
-
-    if params["use_tpu"] and params["tpu"] != tpu_util.LOCAL:
-      optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer)
-
-    # Uses automatic mixed precision FP16 training if on GPU.
-    if params["dtype"] == "fp16":
-      optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(
-          optimizer)
-
-    # Calculate and apply gradients using LazyAdamOptimizer.
-    global_step = tf.train.get_global_step()
-    tvars = tf.trainable_variables()
-    gradients = optimizer.compute_gradients(
-        loss, tvars, colocate_gradients_with_ops=True)
-    minimize_op = optimizer.apply_gradients(
-        gradients, global_step=global_step, name="train")
-    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    train_op = tf.group(minimize_op, update_ops)
-
-    train_metrics = {"learning_rate": learning_rate}
-
-    if not params["use_tpu"]:
-      # gradient norm is not included as a summary when running on TPU, as
-      # it can cause instability between the TPU and the host controller.
-      gradient_norm = tf.global_norm(list(zip(*gradients))[0])
-      train_metrics["global_norm/gradient_norm"] = gradient_norm
-
-    return train_op, train_metrics
-
-
-def translate_and_compute_bleu(estimator, subtokenizer, bleu_source, bleu_ref):
-  """Translate file and report the cased and uncased bleu scores."""
-  # Create temporary file to store translation.
-  tmp = tempfile.NamedTemporaryFile(delete=False)
-  tmp_filename = tmp.name
-
-  translate.translate_file(
-      estimator, subtokenizer, bleu_source, output_file=tmp_filename,
-      print_all_translations=False)
-
-  # Compute uncased and cased bleu scores.
-  uncased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, False)
-  cased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, True)
-  os.remove(tmp_filename)
-  return uncased_score, cased_score
-
-
-def get_global_step(estimator):
-  """Return estimator's last checkpoint."""
-  return int(estimator.latest_checkpoint().split("-")[-1])
-
-
-def evaluate_and_log_bleu(estimator, bleu_source, bleu_ref, vocab_file):
-  """Calculate and record the BLEU score."""
-  subtokenizer = tokenizer.Subtokenizer(vocab_file)
-
-  uncased_score, cased_score = translate_and_compute_bleu(
-      estimator, subtokenizer, bleu_source, bleu_ref)
-
-  tf.logging.info("Bleu score (uncased): %f", uncased_score)
-  tf.logging.info("Bleu score (cased): %f", cased_score)
-  return uncased_score, cased_score
-
-
-def _validate_file(filepath):
-  """Make sure that file exists."""
-  if not tf.io.gfile.exists(filepath):
-    raise tf.errors.NotFoundError(None, None, "File %s not found." % filepath)
-
-
-def run_loop(
-    estimator, schedule_manager, train_hooks=None, benchmark_logger=None,
-    bleu_source=None, bleu_ref=None, bleu_threshold=None, vocab_file=None):
-  """Train and evaluate model, and optionally compute model's BLEU score.
-
-  **Step vs. Epoch vs. Iteration**
-
-  Steps and epochs are canonical terms used in TensorFlow and general machine
-  learning. They are used to describe running a single process (train/eval):
-    - Step refers to running the process through a single or batch of examples.
-    - Epoch refers to running the process through an entire dataset.
-
-  E.g. training a dataset with 100 examples. The dataset is
-  divided into 20 batches with 5 examples per batch. A single training step
-  trains the model on one batch. After 20 training steps, the model will have
-  trained on every batch in the dataset, or, in other words, one epoch.
-
-  Meanwhile, iteration is used in this implementation to describe running
-  multiple processes (training and eval).
-    - A single iteration:
-      1. trains the model for a specific number of steps or epochs.
-      2. evaluates the model.
-      3. (if source and ref files are provided) compute BLEU score.
-
-  This function runs through multiple train+eval+bleu iterations.
-
-  Args:
-    estimator: tf.Estimator containing model to train.
-    schedule_manager: A schedule.Manager object to guide the run loop.
-    train_hooks: List of hooks to pass to the estimator during training.
-    benchmark_logger: a BenchmarkLogger object that logs evaluation data
-    bleu_source: File containing text to be translated for BLEU calculation.
-    bleu_ref: File containing reference translations for BLEU calculation.
-    bleu_threshold: minimum BLEU score before training is stopped.
-    vocab_file: Path to vocab file that will be used to subtokenize bleu_source.
-
-  Returns:
-    Dict of results of the run.  Contains the keys `eval_results`,
-    `train_hooks`, `bleu_cased`, and `bleu_uncased`. `train_hooks` is a list the
-    instances of hooks used during training.
-
-  Raises:
-    ValueError: if both or none of single_iteration_train_steps and
-      single_iteration_train_epochs were defined.
-    NotFoundError: if the vocab file or bleu files don't exist.
-  """
-  if bleu_source:
-    _validate_file(bleu_source)
-  if bleu_ref:
-    _validate_file(bleu_ref)
-  if vocab_file:
-    _validate_file(vocab_file)
-
-  evaluate_bleu = bleu_source is not None and bleu_ref is not None
-  if evaluate_bleu and schedule_manager.use_tpu:
-    raise ValueError("BLEU score can not be computed when training with a TPU, "
-                     "as it requires estimator.predict which is not yet "
-                     "supported.")
-
-  # Print details of training schedule.
-  tf.logging.info("Training schedule:")
-  tf.logging.info(
-      "\t1. Train for {}".format(schedule_manager.train_increment_str))
-  tf.logging.info("\t2. Evaluate model.")
-  if evaluate_bleu:
-    tf.logging.info("\t3. Compute BLEU score.")
-    if bleu_threshold is not None:
-      tf.logging.info("Repeat above steps until the BLEU score reaches %f" %
-                      bleu_threshold)
-  if not evaluate_bleu or bleu_threshold is None:
-    tf.logging.info("Repeat above steps %d times." %
-                    schedule_manager.train_eval_iterations)
-
-  if evaluate_bleu:
-    # Create summary writer to log bleu score (values can be displayed in
-    # Tensorboard).
-    bleu_writer = tf.summary.FileWriter(
-        os.path.join(estimator.model_dir, BLEU_DIR))
-    if bleu_threshold is not None:
-      # Change loop stopping condition if bleu_threshold is defined.
-      schedule_manager.train_eval_iterations = INF
-
-  # Loop training/evaluation/bleu cycles
-  stats = {}
-  for i in xrange(schedule_manager.train_eval_iterations):
-    tf.logging.info("Starting iteration %d" % (i + 1))
-
-    # Train the model for single_iteration_train_steps or until the input fn
-    # runs out of examples (if single_iteration_train_steps is None).
-    estimator.train(
-        dataset.train_input_fn,
-        steps=schedule_manager.single_iteration_train_steps,
-        hooks=train_hooks)
-
-    eval_results = None
-    eval_results = estimator.evaluate(
-        input_fn=dataset.eval_input_fn,
-        steps=schedule_manager.single_iteration_eval_steps)
-
-    tf.logging.info("Evaluation results (iter %d/%d):" %
-                    (i + 1, schedule_manager.train_eval_iterations))
-    tf.logging.info(eval_results)
-    benchmark_logger.log_evaluation_result(eval_results)
-
-    # The results from estimator.evaluate() are measured on an approximate
-    # translation, which utilize the target golden values provided. The actual
-    # bleu score must be computed using the estimator.predict() path, which
-    # outputs translations that are not based on golden values. The translations
-    # are compared to reference file to get the actual bleu score.
-    if evaluate_bleu:
-      uncased_score, cased_score = evaluate_and_log_bleu(
-          estimator, bleu_source, bleu_ref, vocab_file)
-
-      stats["bleu_uncased"] = uncased_score
-      stats["bleu_cased"] = cased_score
-
-      # Write actual bleu scores using summary writer and benchmark logger
-      global_step = get_global_step(estimator)
-      summary = tf.Summary(value=[
-          tf.Summary.Value(tag="bleu/uncased", simple_value=uncased_score),
-          tf.Summary.Value(tag="bleu/cased", simple_value=cased_score),
-      ])
-      bleu_writer.add_summary(summary, global_step)
-      bleu_writer.flush()
-      benchmark_logger.log_metric(
-          "bleu_uncased", uncased_score, global_step=global_step)
-      benchmark_logger.log_metric(
-          "bleu_cased", cased_score, global_step=global_step)
-
-      # Stop training if bleu stopping threshold is met.
-      if model_helpers.past_stop_threshold(bleu_threshold, uncased_score):
-        bleu_writer.close()
-        break
-
-  stats["eval_results"] = eval_results
-  stats["train_hooks"] = train_hooks
-
-  return stats
-
-
-def define_transformer_flags():
-  """Add flags and flag validators for running transformer_main."""
-  # Add common flags (data_dir, model_dir, train_epochs, etc.).
-  flags.DEFINE_integer(
-      name="max_length", short_name="ml", default=None,
-      help=flags_core.help_wrap("Max length."))
-
-  flags_core.define_base(clean=True, train_epochs=True,
-                         epochs_between_evals=True, stop_threshold=True,
-                         num_gpu=True, hooks=True, export_dir=True,
-                         distribution_strategy=True)
-  flags_core.define_performance(
-      num_parallel_calls=True,
-      inter_op=False,
-      intra_op=False,
-      synthetic_data=True,
-      max_train_steps=False,
-      dtype=True,
-      all_reduce_alg=True
-  )
-  flags_core.define_benchmark()
-  flags_core.define_device(tpu=True)
-
-  # Set flags from the flags_core module as "key flags" so they're listed when
-  # the '-h' flag is used. Without this line, the flags defined above are
-  # only shown in the full `--helpful` help text.
-  flags.adopt_module_key_flags(flags_core)
-
-  # Add transformer-specific flags
-  flags.DEFINE_enum(
-      name="param_set", short_name="mp", default="big",
-      enum_values=PARAMS_MAP.keys(),
-      help=flags_core.help_wrap(
-          "Parameter set to use when creating and training the model. The "
-          "parameters define the input shape (batch size and max length), "
-          "model configuration (size of embedding, # of hidden layers, etc.), "
-          "and various other settings. The big parameter set increases the "
-          "default batch size, embedding/hidden size, and filter size. For a "
-          "complete list of parameters, please see model/model_params.py."))
-
-  flags.DEFINE_bool(
-      name="static_batch", default=False,
-      help=flags_core.help_wrap(
-          "Whether the batches in the dataset should have static shapes. In "
-          "general, this setting should be False. Dynamic shapes allow the "
-          "inputs to be grouped so that the number of padding tokens is "
-          "minimized, and helps model training. In cases where the input shape "
-          "must be static (e.g. running on TPU), this setting will be ignored "
-          "and static batching will always be used."))
-
-  # Flags for training with steps (may be used for debugging)
-  flags.DEFINE_integer(
-      name="train_steps", short_name="ts", default=None,
-      help=flags_core.help_wrap("The number of steps used to train."))
-  flags.DEFINE_integer(
-      name="steps_between_evals", short_name="sbe", default=1000,
-      help=flags_core.help_wrap(
-          "The Number of training steps to run between evaluations. This is "
-          "used if --train_steps is defined."))
-
-  # BLEU score computation
-  flags.DEFINE_string(
-      name="bleu_source", short_name="bls", default=None,
-      help=flags_core.help_wrap(
-          "Path to source file containing text translate when calculating the "
-          "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
-          "Use the flag --stop_threshold to stop the script based on the "
-          "uncased BLEU score."))
-  flags.DEFINE_string(
-      name="bleu_ref", short_name="blr", default=None,
-      help=flags_core.help_wrap(
-          "Path to source file containing text translate when calculating the "
-          "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
-          "Use the flag --stop_threshold to stop the script based on the "
-          "uncased BLEU score."))
-  flags.DEFINE_string(
-      name="vocab_file", short_name="vf", default=None,
-      help=flags_core.help_wrap(
-          "Path to subtoken vocabulary file. If data_download.py was used to "
-          "download and encode the training data, look in the data_dir to find "
-          "the vocab file."))
-
-  flags_core.set_defaults(data_dir="/tmp/translate_ende",
-                          model_dir="/tmp/transformer_model",
-                          batch_size=None,
-                          train_epochs=None)
-
-  @flags.multi_flags_validator(
-      ["train_epochs", "train_steps"],
-      message="Both --train_steps and --train_epochs were set. Only one may be "
-              "defined.")
-  def _check_train_limits(flag_dict):
-    return flag_dict["train_epochs"] is None or flag_dict["train_steps"] is None
-
-  @flags.multi_flags_validator(
-      ["bleu_source", "bleu_ref"],
-      message="Both or neither --bleu_source and --bleu_ref must be defined.")
-  def _check_bleu_files(flags_dict):
-    return (flags_dict["bleu_source"] is None) == (
-        flags_dict["bleu_ref"] is None)
-
-  @flags.multi_flags_validator(
-      ["bleu_source", "bleu_ref", "vocab_file"],
-      message="--vocab_file must be defined if --bleu_source and --bleu_ref "
-              "are defined.")
-  def _check_bleu_vocab_file(flags_dict):
-    if flags_dict["bleu_source"] and flags_dict["bleu_ref"]:
-      return flags_dict["vocab_file"] is not None
-    return True
-
-  @flags.multi_flags_validator(
-      ["export_dir", "vocab_file"],
-      message="--vocab_file must be defined if --export_dir is set.")
-  def _check_export_vocab_file(flags_dict):
-    if flags_dict["export_dir"]:
-      return flags_dict["vocab_file"] is not None
-    return True
-
-  flags_core.require_cloud_storage(["data_dir", "model_dir", "export_dir"])
-
-
-def construct_estimator(flags_obj, params, schedule_manager):
-  """Construct an estimator from either Estimator or TPUEstimator.
-
-  Args:
-    flags_obj: The FLAGS object parsed from command line.
-    params: A dict of run specific parameters.
-    schedule_manager: A schedule.Manager object containing the run schedule.
-
-  Returns:
-    An estimator object to be used for training and eval.
-  """
-  if not params["use_tpu"]:
-    distribution_strategy = distribution_utils.get_distribution_strategy(
-        distribution_strategy=flags_obj.distribution_strategy,
-        num_gpus=flags_core.get_num_gpus(flags_obj),
-        all_reduce_alg=flags_obj.all_reduce_alg)
-    return tf.estimator.Estimator(
-        model_fn=model_fn, model_dir=flags_obj.model_dir, params=params,
-        config=tf.estimator.RunConfig(train_distribute=distribution_strategy))
-
-  tpu_cluster_resolver = tf.compat.v1.cluster_resolver.TPUClusterResolver(
-      tpu=flags_obj.tpu,
-      zone=flags_obj.tpu_zone,
-      project=flags_obj.tpu_gcp_project
-  )
-
-  tpu_config = tf.estimator.tpu.TPUConfig(
-      iterations_per_loop=schedule_manager.single_iteration_train_steps,
-      num_shards=flags_obj.num_tpu_shards)
-
-  run_config = tf.estimator.tpu.RunConfig(
-      cluster=tpu_cluster_resolver,
-      model_dir=flags_obj.model_dir,
-      session_config=tf.ConfigProto(
-          allow_soft_placement=True, log_device_placement=True),
-      tpu_config=tpu_config)
-
-  return tf.estimator.tpu.TPUEstimator(
-      model_fn=model_fn,
-      use_tpu=params["use_tpu"] and flags_obj.tpu != tpu_util.LOCAL,
-      train_batch_size=schedule_manager.batch_size,
-      eval_batch_size=schedule_manager.batch_size,
-      params={
-          # TPUEstimator needs to populate batch_size itself due to sharding.
-          key: value for key, value in params.items() if key != "batch_size"
-      },
-      config=run_config)
-
-def per_replica_batch_size(batch_size, num_gpus):
-  """For multi-gpu, batch-size must be a multiple of the number of GPUs.
-
-
-  Note that distribution strategy handles this automatically when used with
-  Keras. For using with Estimator, we need to get per GPU batch.
-
-  Args:
-    batch_size: Global batch size to be divided among devices. This should be
-      equal to num_gpus times the single-GPU batch_size for multi-gpu training.
-    num_gpus: How many GPUs are used with DistributionStrategies.
-
-  Returns:
-    Batch size per device.
-
-  Raises:
-    ValueError: if batch_size is not divisible by number of devices
-  """
-  if num_gpus <= 1:
-    return batch_size
-
-  remainder = batch_size % num_gpus
-  if remainder:
-    err = ('When running with multiple GPUs, batch size '
-           'must be a multiple of the number of available GPUs. Found {} '
-           'GPUs with a batch size of {}; try --batch_size={} instead.'
-          ).format(num_gpus, batch_size, batch_size - remainder)
-    raise ValueError(err)
-  return int(batch_size / num_gpus)
-
-
-def run_transformer(flags_obj):
-  """Create tf.Estimator to train and evaluate transformer model.
-
-  Args:
-    flags_obj: Object containing parsed flag values.
-
-  Returns:
-    Dict of results of the run.  Contains the keys `eval_results`,
-    `train_hooks`, `bleu_cased`, and `bleu_uncased`. `train_hooks` is a list the
-    instances of hooks used during training.
-  """
-  num_gpus = flags_core.get_num_gpus(flags_obj)
-
-  # Add flag-defined parameters to params object
-  params = PARAMS_MAP[flags_obj.param_set]
-  if num_gpus > 1:
-    if flags_obj.param_set == "big":
-      params = model_params.BIG_MULTI_GPU_PARAMS
-    elif flags_obj.param_set == "base":
-      params = model_params.BASE_MULTI_GPU_PARAMS
-
-  params["data_dir"] = flags_obj.data_dir
-  params["model_dir"] = flags_obj.model_dir
-  params["num_parallel_calls"] = flags_obj.num_parallel_calls
-
-  params["tpu"] = flags_obj.tpu
-  params["use_tpu"] = bool(flags_obj.tpu)  # was a tpu specified.
-  params["static_batch"] = flags_obj.static_batch or params["use_tpu"]
-  params["allow_ffn_pad"] = not params["use_tpu"]
-
-  params["max_length"] = flags_obj.max_length or params["max_length"]
-
-  params["use_synthetic_data"] = flags_obj.use_synthetic_data
-
-  # Set batch size parameter, which depends on the availability of
-  # TPU and GPU, and distribution settings.
-  params["batch_size"] = (flags_obj.batch_size or (
-      params["default_batch_size_tpu"] if params["use_tpu"]
-      else params["default_batch_size"]))
-
-  total_batch_size = params["batch_size"]
-  if not params["use_tpu"]:
-    params["batch_size"] = per_replica_batch_size(params["batch_size"],
-                                                  num_gpus)
-
-  schedule_manager = schedule.Manager(
-      train_steps=flags_obj.train_steps,
-      steps_between_evals=flags_obj.steps_between_evals,
-      train_epochs=flags_obj.train_epochs,
-      epochs_between_evals=flags_obj.epochs_between_evals,
-      default_train_epochs=DEFAULT_TRAIN_EPOCHS,
-      batch_size=params["batch_size"],
-      max_length=params["max_length"],
-      use_tpu=params["use_tpu"],
-      num_tpu_shards=flags_obj.num_tpu_shards
-  )
-
-  params["repeat_dataset"] = schedule_manager.repeat_dataset
-
-  model_helpers.apply_clean(flags.FLAGS)
-
-  # Create hooks that log information about the training and metric values
-  train_hooks = hooks_helper.get_train_hooks(
-      flags_obj.hooks,
-      model_dir=flags_obj.model_dir,
-      tensors_to_log=TENSORS_TO_LOG,  # used for logging hooks
-      batch_size=total_batch_size,  # for ExamplesPerSecondHook
-      use_tpu=params["use_tpu"]  # Not all hooks can run with TPUs
-  )
-  benchmark_logger = logger.get_benchmark_logger()
-  benchmark_logger.log_run_info(
-      model_name="transformer",
-      dataset_name="wmt_translate_ende",
-      run_params=params,
-      test_id=flags_obj.benchmark_test_id)
-
-  # Train and evaluate transformer model
-  estimator = construct_estimator(flags_obj, params, schedule_manager)
-  stats = run_loop(
-      estimator=estimator,
-      # Training arguments
-      schedule_manager=schedule_manager,
-      train_hooks=train_hooks,
-      benchmark_logger=benchmark_logger,
-      # BLEU calculation arguments
-      bleu_source=flags_obj.bleu_source,
-      bleu_ref=flags_obj.bleu_ref,
-      bleu_threshold=flags_obj.stop_threshold,
-      vocab_file=flags_obj.vocab_file)
-
-  if flags_obj.export_dir and not params["use_tpu"]:
-    serving_input_fn = export.build_tensor_serving_input_receiver_fn(
-        shape=[None], dtype=tf.int64, batch_size=None)
-    # Export saved model, and save the vocab file as an extra asset. The vocab
-    # file is saved to allow consistent input encoding and output decoding.
-    # (See the "Export trained model" section in the README for an example of
-    # how to use the vocab file.)
-    # Since the model itself does not use the vocab file, this file is saved as
-    # an extra asset rather than a core asset.
-    estimator.export_savedmodel(
-        flags_obj.export_dir, serving_input_fn,
-        assets_extra={"vocab.txt": flags_obj.vocab_file},
-        strip_default_attrs=True)
-  return stats
-
-
-def main(_):
-  with logger.benchmark_context(flags.FLAGS):
-    run_transformer(flags.FLAGS)
-
-
-if __name__ == "__main__":
-  tf.logging.set_verbosity(tf.logging.INFO)
-  define_transformer_flags()
-  absl_app.run(main)
--- a/official/r1/transformer/translate.py
+++ b/official/r1/transformer/translate.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Translate text or files using trained transformer model."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-# pylint: disable=g-bad-import-order
-from absl import app as absl_app
-from absl import flags
-import tensorflow.compat.v1 as tf
-# pylint: enable=g-bad-import-order
-
-from official.nlp.transformer.utils import tokenizer
-from official.utils.flags import core as flags_core
-
-_DECODE_BATCH_SIZE = 32
-_EXTRA_DECODE_LENGTH = 100
-_BEAM_SIZE = 4
-_ALPHA = 0.6
-
-
-def _get_sorted_inputs(filename):
-  """Read and sort lines from the file sorted by decreasing length.
-
-  Args:
-    filename: String name of file to read inputs from.
-  Returns:
-    Sorted list of inputs, and dictionary mapping original index->sorted index
-    of each element.
-  """
-  with tf.io.gfile.GFile(filename) as f:
-    records = f.read().split("\n")
-    inputs = [record.strip() for record in records]
-    if not inputs[-1]:
-      inputs.pop()
-
-  input_lens = [(i, len(line.split())) for i, line in enumerate(inputs)]
-  sorted_input_lens = sorted(input_lens, key=lambda x: x[1], reverse=True)
-
-  sorted_inputs = [None] * len(sorted_input_lens)
-  sorted_keys = [0] * len(sorted_input_lens)
-  for i, (index, _) in enumerate(sorted_input_lens):
-    sorted_inputs[i] = inputs[index]
-    sorted_keys[index] = i
-
-  return sorted_inputs, sorted_keys
-
-
-def _encode_and_add_eos(line, subtokenizer):
-  """Encode line with subtokenizer, and add EOS id to the end."""
-  return subtokenizer.encode(line) + [tokenizer.EOS_ID]
-
-
-def _trim_and_decode(ids, subtokenizer):
-  """Trim EOS and PAD tokens from ids, and decode to return a string."""
-  try:
-    index = list(ids).index(tokenizer.EOS_ID)
-    return subtokenizer.decode(ids[:index])
-  except ValueError:  # No EOS found in sequence
-    return subtokenizer.decode(ids)
-
-
-def translate_file(
-    estimator, subtokenizer, input_file, output_file=None,
-    print_all_translations=True):
-  """Translate lines in file, and save to output file if specified.
-
-  Args:
-    estimator: tf.Estimator used to generate the translations.
-    subtokenizer: Subtokenizer object for encoding and decoding source and
-       translated lines.
-    input_file: file containing lines to translate
-    output_file: file that stores the generated translations.
-    print_all_translations: If true, all translations are printed to stdout.
-
-  Raises:
-    ValueError: if output file is invalid.
-  """
-  batch_size = _DECODE_BATCH_SIZE
-
-  # Read and sort inputs by length. Keep dictionary (original index-->new index
-  # in sorted list) to write translations in the original order.
-  sorted_inputs, sorted_keys = _get_sorted_inputs(input_file)
-  num_decode_batches = (len(sorted_inputs) - 1) // batch_size + 1
-
-  def input_generator():
-    """Yield encoded strings from sorted_inputs."""
-    for i, line in enumerate(sorted_inputs):
-      if i % batch_size == 0:
-        batch_num = (i // batch_size) + 1
-
-        tf.logging.info("Decoding batch %d out of %d." %
-                        (batch_num, num_decode_batches))
-      yield _encode_and_add_eos(line, subtokenizer)
-
-  def input_fn():
-    """Created batched dataset of encoded inputs."""
-    ds = tf.data.Dataset.from_generator(
-        input_generator, tf.int64, tf.TensorShape([None]))
-    ds = ds.padded_batch(batch_size, [None])
-    return ds
-
-  translations = []
-  for i, prediction in enumerate(estimator.predict(input_fn)):
-    translation = _trim_and_decode(prediction["outputs"], subtokenizer)
-    translations.append(translation)
-
-    if print_all_translations:
-      tf.logging.info("Translating:\n\tInput: %s\n\tOutput: %s" %
-                      (sorted_inputs[i], translation))
-
-  # Write translations in the order they appeared in the original file.
-  if output_file is not None:
-    if tf.io.gfile.isdir(output_file):
-      raise ValueError("File output is a directory, will not save outputs to "
-                       "file.")
-    tf.logging.info("Writing to file %s" % output_file)
-    with tf.io.gfile.GFile(output_file, "w") as f:
-      for i in sorted_keys:
-        f.write("%s\n" % translations[i])
-
-
-def translate_text(estimator, subtokenizer, txt):
-  """Translate a single string."""
-  encoded_txt = _encode_and_add_eos(txt, subtokenizer)
-
-  def input_fn():
-    ds = tf.data.Dataset.from_tensors(encoded_txt)
-    ds = ds.batch(_DECODE_BATCH_SIZE)
-    return ds
-
-  predictions = estimator.predict(input_fn)
-  translation = next(predictions)["outputs"]
-  translation = _trim_and_decode(translation, subtokenizer)
-  tf.logging.info("Translation of \"%s\": \"%s\"" % (txt, translation))
-
-
-def main(unused_argv):
-  from official.transformer import transformer_main
-
-  tf.logging.set_verbosity(tf.logging.INFO)
-
-  if FLAGS.text is None and FLAGS.file is None:
-    tf.logging.warn("Nothing to translate. Make sure to call this script using "
-                    "flags --text or --file.")
-    return
-
-  subtokenizer = tokenizer.Subtokenizer(FLAGS.vocab_file)
-
-  # Set up estimator and params
-  params = transformer_main.PARAMS_MAP[FLAGS.param_set]
-  params["beam_size"] = _BEAM_SIZE
-  params["alpha"] = _ALPHA
-  params["extra_decode_length"] = _EXTRA_DECODE_LENGTH
-  params["batch_size"] = _DECODE_BATCH_SIZE
-  estimator = tf.estimator.Estimator(
-      model_fn=transformer_main.model_fn, model_dir=FLAGS.model_dir,
-      params=params)
-
-  if FLAGS.text is not None:
-    tf.logging.info("Translating text: %s" % FLAGS.text)
-    translate_text(estimator, subtokenizer, FLAGS.text)
-
-  if FLAGS.file is not None:
-    input_file = os.path.abspath(FLAGS.file)
-    tf.logging.info("Translating file: %s" % input_file)
-    if not tf.gfile.Exists(FLAGS.file):
-      raise ValueError("File does not exist: %s" % input_file)
-
-    output_file = None
-    if FLAGS.file_out is not None:
-      output_file = os.path.abspath(FLAGS.file_out)
-      tf.logging.info("File output specified: %s" % output_file)
-
-    translate_file(estimator, subtokenizer, input_file, output_file)
-
-
-def define_translate_flags():
-  """Define flags used for translation script."""
-  # Model flags
-  flags.DEFINE_string(
-      name="model_dir", short_name="md", default="/tmp/transformer_model",
-      help=flags_core.help_wrap(
-          "Directory containing Transformer model checkpoints."))
-  flags.DEFINE_enum(
-      name="param_set", short_name="mp", default="big",
-      enum_values=["base", "big"],
-      help=flags_core.help_wrap(
-          "Parameter set to use when creating and training the model. The "
-          "parameters define the input shape (batch size and max length), "
-          "model configuration (size of embedding, # of hidden layers, etc.), "
-          "and various other settings. The big parameter set increases the "
-          "default batch size, embedding/hidden size, and filter size. For a "
-          "complete list of parameters, please see model/model_params.py."))
-  flags.DEFINE_string(
-      name="vocab_file", short_name="vf", default=None,
-      help=flags_core.help_wrap(
-          "Path to subtoken vocabulary file. If data_download.py was used to "
-          "download and encode the training data, look in the data_dir to find "
-          "the vocab file."))
-  flags.mark_flag_as_required("vocab_file")
-
-  flags.DEFINE_string(
-      name="text", default=None,
-      help=flags_core.help_wrap(
-          "Text to translate. Output will be printed to console."))
-  flags.DEFINE_string(
-      name="file", default=None,
-      help=flags_core.help_wrap(
-          "File containing text to translate. Translation will be printed to "
-          "console and, if --file_out is provided, saved to an output file."))
-  flags.DEFINE_string(
-      name="file_out", default=None,
-      help=flags_core.help_wrap(
-          "If --file flag is specified, save translation to this file."))
-
-
-if __name__ == "__main__":
-  define_translate_flags()
-  FLAGS = flags.FLAGS
-  absl_app.run(main)
--- a/official/r1/utils/__init__.py
+++ b/official/r1/utils/__init__.py
--- a/official/r1/utils/data/__init__.py
+++ b/official/r1/utils/data/__init__.py
--- a/official/r1/utils/data/file_io.py
+++ b/official/r1/utils/data/file_io.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Convenience functions for managing dataset file buffers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import atexit
-import multiprocessing
-import multiprocessing.dummy
-import os
-import tempfile
-import uuid
-
-from absl import logging
-import numpy as np
-import six
-import tensorflow as tf
-# pylint:disable=logging-format-interpolation
-
-
-class _GarbageCollector(object):
-  """Deletes temporary buffer files at exit.
-
-  Certain tasks (such as NCF Recommendation) require writing buffers to
-  temporary files. (Which may be local or distributed.) It is not generally safe
-  to delete these files during operation, but they should be cleaned up. This
-  class keeps track of temporary files created, and deletes them at exit.
-  """
-  def __init__(self):
-    self.temp_buffers = []
-
-  def register(self, filepath):
-    self.temp_buffers.append(filepath)
-
-  def purge(self):
-    try:
-      for i in self.temp_buffers:
-        if tf.io.gfile.exists(i):
-          tf.io.gfile.remove(i)
-          logging.info("Buffer file {} removed".format(i))
-    except Exception as e:
-      logging.error("Failed to cleanup buffer files: {}".format(e))
-
-
-_GARBAGE_COLLECTOR = _GarbageCollector()
-atexit.register(_GARBAGE_COLLECTOR.purge)
-
-_ROWS_PER_CORE = 50000
-
-
-def write_to_temp_buffer(dataframe, buffer_folder, columns):
-  if buffer_folder is None:
-    _, buffer_path = tempfile.mkstemp()
-  else:
-    tf.io.gfile.makedirs(buffer_folder)
-    buffer_path = os.path.join(buffer_folder, str(uuid.uuid4()))
-  _GARBAGE_COLLECTOR.register(buffer_path)
-
-  return write_to_buffer(dataframe, buffer_path, columns)
-
-
-def iter_shard_dataframe(df, rows_per_core=1000):
-  """Two way shard of a dataframe.
-
-  This function evenly shards a dataframe so that it can be mapped efficiently.
-  It yields a list of dataframes with length equal to the number of CPU cores,
-  with each dataframe having rows_per_core rows. (Except for the last batch
-  which may have fewer rows in the dataframes.) Passing vectorized inputs to
-  a pool is more effecient than iterating through a dataframe in serial and
-  passing a list of inputs to the pool.
-
-  Args:
-    df: Pandas dataframe to be sharded.
-    rows_per_core: Number of rows in each shard.
-
-  Returns:
-    A list of dataframe shards.
-  """
-  n = len(df)
-  num_cores = min([multiprocessing.cpu_count(), n])
-
-  num_blocks = int(np.ceil(n / num_cores / rows_per_core))
-  max_batch_size = num_cores * rows_per_core
-  for i in range(num_blocks):
-    min_index = i * max_batch_size
-    max_index = min([(i + 1) * max_batch_size, n])
-    df_shard = df[min_index:max_index]
-    n_shard = len(df_shard)
-    boundaries = np.linspace(0, n_shard, num_cores + 1, dtype=np.int64)
-    yield [df_shard[boundaries[j]:boundaries[j+1]] for j in range(num_cores)]
-
-
-def _shard_dict_to_examples(shard_dict):
-  """Converts a dict of arrays into a list of example bytes."""
-  n = [i for i in shard_dict.values()][0].shape[0]
-  feature_list = [{} for _ in range(n)]
-  for column, values in shard_dict.items():
-    if len(values.shape) == 1:
-      values = np.reshape(values, values.shape + (1,))
-
-    if values.dtype.kind == "i":
-      feature_map = lambda x: tf.train.Feature(
-          int64_list=tf.train.Int64List(value=x))
-    elif values.dtype.kind == "f":
-      feature_map = lambda x: tf.train.Feature(
-          float_list=tf.train.FloatList(value=x))
-    else:
-      raise ValueError("Invalid dtype")
-    for i in range(n):
-      feature_list[i][column] = feature_map(values[i])
-  examples = [
-      tf.train.Example(features=tf.train.Features(feature=example_features))
-      for example_features in feature_list
-  ]
-
-  return [e.SerializeToString() for e in examples]
-
-
-def _serialize_shards(df_shards, columns, pool, writer):
-  """Map sharded dataframes to bytes, and write them to a buffer.
-
-  Args:
-    df_shards: A list of pandas dataframes. (Should be of similar size)
-    columns: The dataframe columns to be serialized.
-    pool: A pool to serialize in parallel.
-    writer: A TFRecordWriter to write the serialized shards.
-  """
-  # Pandas does not store columns of arrays as nd arrays. stack remedies this.
-  map_inputs = [{c: np.stack(shard[c].values, axis=0) for c in columns}
-                for shard in df_shards]
-
-  # Failure within pools is very irksome. Thus, it is better to thoroughly check
-  # inputs in the main process.
-  for inp in map_inputs:
-    # Check that all fields have the same number of rows.
-    assert len(set([v.shape[0] for v in inp.values()])) == 1
-    for val in inp.values():
-      assert hasattr(val, "dtype")
-      assert hasattr(val.dtype, "kind")
-      assert val.dtype.kind in ("i", "f")
-      assert len(val.shape) in (1, 2)
-  shard_bytes = pool.map(_shard_dict_to_examples, map_inputs)
-  for s in shard_bytes:
-    for example in s:
-      writer.write(example)
-
-
-def write_to_buffer(dataframe, buffer_path, columns, expected_size=None):
-  """Write a dataframe to a binary file for a dataset to consume.
-
-  Args:
-    dataframe: The pandas dataframe to be serialized.
-    buffer_path: The path where the serialized results will be written.
-    columns: The dataframe columns to be serialized.
-    expected_size: The size in bytes of the serialized results. This is used to
-      lazily construct the buffer.
-
-  Returns:
-    The path of the buffer.
-  """
-  if (tf.io.gfile.exists(buffer_path) and
-      tf.io.gfile.stat(buffer_path).length > 0):
-    actual_size = tf.io.gfile.stat(buffer_path).length
-    if expected_size == actual_size:
-      return buffer_path
-    logging.warning(
-        "Existing buffer {} has size {}. Expected size {}. Deleting and "
-        "rebuilding buffer.".format(buffer_path, actual_size, expected_size))
-    tf.io.gfile.remove(buffer_path)
-
-  if dataframe is None:
-    raise ValueError(
-        "dataframe was None but a valid existing buffer was not found.")
-
-  tf.io.gfile.makedirs(os.path.split(buffer_path)[0])
-
-  logging.info("Constructing TFRecordDataset buffer: {}".format(buffer_path))
-
-  count = 0
-  pool = multiprocessing.dummy.Pool(multiprocessing.cpu_count())
-  try:
-    with tf.io.TFRecordWriter(buffer_path) as writer:
-      for df_shards in iter_shard_dataframe(df=dataframe,
-                                            rows_per_core=_ROWS_PER_CORE):
-        _serialize_shards(df_shards, columns, pool, writer)
-        count += sum([len(s) for s in df_shards])
-        logging.info("{}/{} examples written.".format(
-            str(count).ljust(8), len(dataframe)))
-  finally:
-    pool.terminate()
-
-  logging.info("Buffer write complete.")
-  return buffer_path
--- a/official/r1/utils/data/file_io_test.py
+++ b/official/r1/utils/data/file_io_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for binary data file utilities."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import multiprocessing
-
-# pylint: disable=wrong-import-order
-import numpy as np
-import pandas as pd
-import tensorflow as tf
-# pylint: enable=wrong-import-order
-
-from official.r1.utils.data import file_io
-
-
-_RAW_ROW = "raw_row"
-_DUMMY_COL = "column_0"
-_DUMMY_VEC_COL = "column_1"
-_DUMMY_VEC_LEN = 4
-
-_ROWS_PER_CORE = 4
-_TEST_CASES = [
-    # One batch of one
-    dict(row_count=1, cpu_count=1, expected=[
-        [[0]]
-    ]),
-
-    dict(row_count=10, cpu_count=1, expected=[
-        [[0, 1, 2, 3]], [[4, 5, 6, 7]], [[8, 9]]
-    ]),
-
-    dict(row_count=21, cpu_count=1, expected=[
-        [[0, 1, 2, 3]], [[4, 5, 6, 7]], [[8, 9, 10, 11]],
-        [[12, 13, 14, 15]], [[16, 17, 18, 19]], [[20]]
-    ]),
-
-    dict(row_count=1, cpu_count=4, expected=[
-        [[0]]
-    ]),
-
-    dict(row_count=10, cpu_count=4, expected=[
-        [[0, 1], [2, 3, 4], [5, 6], [7, 8, 9]]
-    ]),
-
-    dict(row_count=21, cpu_count=4, expected=[
-        [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]],
-        [[16], [17], [18], [19, 20]]
-    ]),
-
-    dict(row_count=10, cpu_count=8, expected=[
-        [[0], [1], [2], [3, 4], [5], [6], [7], [8, 9]]
-    ]),
-
-    dict(row_count=40, cpu_count=8, expected=[
-        [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15],
-         [16, 17, 18, 19], [20, 21, 22, 23], [24, 25, 26, 27],
-         [28, 29, 30, 31]],
-        [[32], [33], [34], [35], [36], [37], [38], [39]]
-    ]),
-]
-
-_FEATURE_MAP = {
-    _RAW_ROW: tf.io.FixedLenFeature([1], dtype=tf.int64),
-    _DUMMY_COL: tf.io.FixedLenFeature([1], dtype=tf.int64),
-    _DUMMY_VEC_COL: tf.io.FixedLenFeature([_DUMMY_VEC_LEN], dtype=tf.float32)
-}
-
-
-@contextlib.contextmanager
-def fixed_core_count(cpu_count):
-  """Override CPU count.
-
-  file_io.py uses the cpu_count function to scale to the size of the instance.
-  However, this is not desirable for testing because it can make the test flaky.
-  Instead, this context manager fixes the count for more robust testing.
-
-  Args:
-    cpu_count: How many cores multiprocessing claims to have.
-
-  Yields:
-    Nothing. (for context manager only)
-  """
-  old_count_fn = multiprocessing.cpu_count
-  multiprocessing.cpu_count = lambda: cpu_count
-  yield
-  multiprocessing.cpu_count = old_count_fn
-
-
-class BaseTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(BaseTest, self).setUp()
-    tf.compat.v1.disable_eager_execution()
-
-  def _test_sharding(self, row_count, cpu_count, expected):
-    df = pd.DataFrame({_DUMMY_COL: list(range(row_count))})
-    with fixed_core_count(cpu_count):
-      shards = list(file_io.iter_shard_dataframe(df, _ROWS_PER_CORE))
-    result = [[j[_DUMMY_COL].tolist() for j in i] for i in shards]
-    self.assertAllEqual(expected, result)
-
-  def test_tiny_rows_low_core(self):
-    self._test_sharding(**_TEST_CASES[0])
-
-  def test_small_rows_low_core(self):
-    self._test_sharding(**_TEST_CASES[1])
-
-  def test_large_rows_low_core(self):
-    self._test_sharding(**_TEST_CASES[2])
-
-  def test_tiny_rows_medium_core(self):
-    self._test_sharding(**_TEST_CASES[3])
-
-  def test_small_rows_medium_core(self):
-    self._test_sharding(**_TEST_CASES[4])
-
-  def test_large_rows_medium_core(self):
-    self._test_sharding(**_TEST_CASES[5])
-
-  def test_small_rows_large_core(self):
-    self._test_sharding(**_TEST_CASES[6])
-
-  def test_large_rows_large_core(self):
-    self._test_sharding(**_TEST_CASES[7])
-
-  def _serialize_deserialize(self, num_cores=1, num_rows=20):
-    np.random.seed(1)
-    df = pd.DataFrame({
-        # Serialization order is only deterministic for num_cores=1. raw_row is
-        # used in validation after the deserialization.
-        _RAW_ROW: np.array(range(num_rows), dtype=np.int64),
-        _DUMMY_COL: np.random.randint(0, 35, size=(num_rows,)),
-        _DUMMY_VEC_COL: [
-            np.array([np.random.random() for _ in range(_DUMMY_VEC_LEN)])
-            for i in range(num_rows)  # pylint: disable=unused-variable
-        ]
-    })
-
-    with fixed_core_count(num_cores):
-      buffer_path = file_io.write_to_temp_buffer(
-          df, self.get_temp_dir(), [_RAW_ROW, _DUMMY_COL, _DUMMY_VEC_COL])
-
-    with self.session(graph=tf.Graph()) as sess:
-      dataset = tf.data.TFRecordDataset(buffer_path)
-      dataset = dataset.batch(1).map(
-          lambda x: tf.io.parse_example(serialized=x, features=_FEATURE_MAP))
-
-      data_iter = tf.compat.v1.data.make_one_shot_iterator(dataset)
-      seen_rows = set()
-      for i in range(num_rows+5):
-        row = data_iter.get_next()
-        try:
-          row_id, val_0, val_1 = sess.run(
-              [row[_RAW_ROW], row[_DUMMY_COL], row[_DUMMY_VEC_COL]])
-          row_id, val_0, val_1 = row_id[0][0], val_0[0][0], val_1[0]
-          assert row_id not in seen_rows
-          seen_rows.add(row_id)
-
-          self.assertEqual(val_0, df[_DUMMY_COL][row_id])
-          self.assertAllClose(val_1, df[_DUMMY_VEC_COL][row_id])
-
-          self.assertLess(i, num_rows, msg="Too many rows.")
-        except tf.errors.OutOfRangeError:
-          self.assertGreaterEqual(i, num_rows, msg="Too few rows.")
-
-    file_io._GARBAGE_COLLECTOR.purge()
-    assert not tf.io.gfile.exists(buffer_path)
-
-  def test_serialize_deserialize_0(self):
-    self._serialize_deserialize(num_cores=1)
-
-  def test_serialize_deserialize_1(self):
-    self._serialize_deserialize(num_cores=2)
-
-  def test_serialize_deserialize_2(self):
-    self._serialize_deserialize(num_cores=8)
-
-
-if __name__ == "__main__":
-  tf.test.main()
--- a/official/r1/utils/export.py
+++ b/official/r1/utils/export.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Convenience functions for exporting models as SavedModels or other types."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-
-def build_tensor_serving_input_receiver_fn(shape, dtype=tf.float32,
-                                           batch_size=1):
-  """Returns a input_receiver_fn that can be used during serving.
-
-  This expects examples to come through as float tensors, and simply
-  wraps them as TensorServingInputReceivers.
-
-  Arguably, this should live in tf.estimator.export. Testing here first.
-
-  Args:
-    shape: list representing target size of a single example.
-    dtype: the expected datatype for the input example
-    batch_size: number of input tensors that will be passed for prediction
-
-  Returns:
-    A function that itself returns a TensorServingInputReceiver.
-  """
-  def serving_input_receiver_fn():
-    # Prep a placeholder where the input example will be fed in
-    features = tf.compat.v1.placeholder(
-        dtype=dtype, shape=[batch_size] + shape, name='input_tensor')
-
-    return tf.estimator.export.TensorServingInputReceiver(
-        features=features, receiver_tensors=features)
-
-  return serving_input_receiver_fn
--- a/official/r1/utils/export_test.py
+++ b/official/r1/utils/export_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for exporting utils."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-
-from official.r1.utils import export
-
-
-class ExportUtilsTest(tf.test.TestCase):
-  """Tests for the ExportUtils."""
-
-  def test_build_tensor_serving_input_receiver_fn(self):
-    receiver_fn = export.build_tensor_serving_input_receiver_fn(shape=[4, 5])
-    with tf.Graph().as_default():
-      receiver = receiver_fn()
-      self.assertIsInstance(
-          receiver, tf.estimator.export.TensorServingInputReceiver)
-
-      self.assertIsInstance(receiver.features, tf.Tensor)
-      self.assertEqual(receiver.features.shape, tf.TensorShape([1, 4, 5]))
-      self.assertEqual(receiver.features.dtype, tf.float32)
-      self.assertIsInstance(receiver.receiver_tensors, dict)
-      # Note that Python 3 can no longer index .values() directly; cast to list.
-      self.assertEqual(list(receiver.receiver_tensors.values())[0].shape,
-                       tf.TensorShape([1, 4, 5]))
-
-  def test_build_tensor_serving_input_receiver_fn_batch_dtype(self):
-    receiver_fn = export.build_tensor_serving_input_receiver_fn(
-        shape=[4, 5], dtype=tf.int8, batch_size=10)
-
-    with tf.Graph().as_default():
-      receiver = receiver_fn()
-      self.assertIsInstance(
-          receiver, tf.estimator.export.TensorServingInputReceiver)
-
-      self.assertIsInstance(receiver.features, tf.Tensor)
-      self.assertEqual(receiver.features.shape, tf.TensorShape([10, 4, 5]))
-      self.assertEqual(receiver.features.dtype, tf.int8)
-      self.assertIsInstance(receiver.receiver_tensors, dict)
-      # Note that Python 3 can no longer index .values() directly; cast to list.
-      self.assertEqual(list(receiver.receiver_tensors.values())[0].shape,
-                       tf.TensorShape([10, 4, 5]))
-
-
-if __name__ == "__main__":
-  tf.test.main()
--- a/official/r1/utils/logs/__init__.py
+++ b/official/r1/utils/logs/__init__.py
--- a/official/r1/utils/logs/cloud_lib.py
+++ b/official/r1/utils/logs/cloud_lib.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Utilities that interact with cloud service.
-"""
-
-import requests
-
-GCP_METADATA_URL = "http://metadata/computeMetadata/v1/instance/hostname"
-GCP_METADATA_HEADER = {"Metadata-Flavor": "Google"}
-
-
-def on_gcp():
-  """Detect whether the current running environment is on GCP."""
-  try:
-    # Timeout in 5 seconds, in case the test environment has connectivity issue.
-    # There is not default timeout, which means it might block forever.
-    response = requests.get(
-        GCP_METADATA_URL, headers=GCP_METADATA_HEADER, timeout=5)
-    return response.status_code == 200
-  except requests.exceptions.RequestException:
-    return False
--- a/official/r1/utils/logs/guidelines.md
+++ b/official/r1/utils/logs/guidelines.md
-# Logging in official models
-
-This library adds logging functions that print or save tensor values. Official models should define all common hooks
-(using hooks helper) and a benchmark logger.
-
-1. **Training Hooks**
-
-   Hooks are a TensorFlow concept that define specific actions at certain points of the execution. We use them to obtain and log
-   tensor values during training.
-
-   hooks_helper.py provides an easy way to create common hooks. The following hooks are currently defined:
-   * LoggingTensorHook: Logs tensor values
-   * ProfilerHook: Writes a timeline json that can be loaded into chrome://tracing.
-   * ExamplesPerSecondHook: Logs the number of examples processed per second.
-   * LoggingMetricHook: Similar to LoggingTensorHook, except that the tensors are logged in a format defined by our data
-     anaylsis pipeline.
-
-
-2. **Benchmarks**
-
-   The benchmark logger provides useful functions for logging environment information, and evaluation results.
-   The module also contains a context which is used to update the status of the run.
-
-Example usage:
-
-```
-from absl import app as absl_app
-
-from official.utils.logs import hooks_helper
-from official.utils.logs import logger
-
-def model_main(flags_obj):
-  estimator = ...
-
-  benchmark_logger = logger.get_benchmark_logger()
-  benchmark_logger.log_run_info(...)
-
-  train_hooks = hooks_helper.get_train_hooks(...)
-
-  for epoch in range(10):
-    estimator.train(..., hooks=train_hooks)
-    eval_results = estimator.evaluate(...)
-
-    # Log a dictionary of metrics
-    benchmark_logger.log_evaluation_result(eval_results)
-
-    # Log an individual metric
-    benchmark_logger.log_metric(...)
-
-
-def main(_):
-  with logger.benchmark_context(flags.FLAGS):
-    model_main(flags.FLAGS)
-
-if __name__ == "__main__":
-  # define flags
-  absl_app.run(main)
-```
--- a/official/r1/utils/logs/hooks.py
+++ b/official/r1/utils/logs/hooks.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Hook that counts examples per second every N steps or seconds."""
-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-
-from official.r1.utils.logs import logger
-
-
-class ExamplesPerSecondHook(tf.estimator.SessionRunHook):
-  """Hook to print out examples per second.
-
-  Total time is tracked and then divided by the total number of steps
-  to get the average step time and then batch_size is used to determine
-  the running average of examples per second. The examples per second for the
-  most recent interval is also logged.
-  """
-
-  def __init__(self,
-               batch_size,
-               every_n_steps=None,
-               every_n_secs=None,
-               warm_steps=0,
-               metric_logger=None):
-    """Initializer for ExamplesPerSecondHook.
-
-    Args:
-      batch_size: Total batch size across all workers used to calculate
-        examples/second from global time.
-      every_n_steps: Log stats every n steps.
-      every_n_secs: Log stats every n seconds. Exactly one of the
-        `every_n_steps` or `every_n_secs` should be set.
-      warm_steps: The number of steps to be skipped before logging and running
-        average calculation. warm_steps steps refers to global steps across all
-        workers, not on each worker
-      metric_logger: instance of `BenchmarkLogger`, the benchmark logger that
-          hook should use to write the log. If None, BaseBenchmarkLogger will
-          be used.
-
-    Raises:
-      ValueError: if neither `every_n_steps` or `every_n_secs` is set, or
-      both are set.
-    """
-
-    if (every_n_steps is None) == (every_n_secs is None):
-      raise ValueError("exactly one of every_n_steps"
-                       " and every_n_secs should be provided.")
-
-    self._logger = metric_logger or logger.BaseBenchmarkLogger()
-
-    self._timer = tf.estimator.SecondOrStepTimer(
-        every_steps=every_n_steps, every_secs=every_n_secs)
-
-    self._step_train_time = 0
-    self._total_steps = 0
-    self._batch_size = batch_size
-    self._warm_steps = warm_steps
-    # List of examples per second logged every_n_steps.
-    self.current_examples_per_sec_list = []
-
-  def begin(self):
-    """Called once before using the session to check global step."""
-    self._global_step_tensor = tf.compat.v1.train.get_global_step()
-    if self._global_step_tensor is None:
-      raise RuntimeError(
-          "Global step should be created to use StepCounterHook.")
-
-  def before_run(self, run_context):  # pylint: disable=unused-argument
-    """Called before each call to run().
-
-    Args:
-      run_context: A SessionRunContext object.
-
-    Returns:
-      A SessionRunArgs object or None if never triggered.
-    """
-    return tf.estimator.SessionRunArgs(self._global_step_tensor)
-
-  def after_run(self, run_context, run_values):  # pylint: disable=unused-argument
-    """Called after each call to run().
-
-    Args:
-      run_context: A SessionRunContext object.
-      run_values: A SessionRunValues object.
-    """
-    global_step = run_values.results
-
-    if self._timer.should_trigger_for_step(
-        global_step) and global_step > self._warm_steps:
-      elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(
-          global_step)
-      if elapsed_time is not None:
-        self._step_train_time += elapsed_time
-        self._total_steps += elapsed_steps
-
-        # average examples per second is based on the total (accumulative)
-        # training steps and training time so far
-        average_examples_per_sec = self._batch_size * (
-            self._total_steps / self._step_train_time)
-        # current examples per second is based on the elapsed training steps
-        # and training time per batch
-        current_examples_per_sec = self._batch_size * (
-            elapsed_steps / elapsed_time)
-        # Logs entries to be read from hook during or after run.
-        self.current_examples_per_sec_list.append(current_examples_per_sec)
-        self._logger.log_metric(
-            "average_examples_per_sec", average_examples_per_sec,
-            global_step=global_step)
-
-        self._logger.log_metric(
-            "current_examples_per_sec", current_examples_per_sec,
-            global_step=global_step)
--- a/official/r1/utils/logs/hooks_helper.py
+++ b/official/r1/utils/logs/hooks_helper.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Hooks helper to return a list of TensorFlow hooks for training by name.
-
-More hooks can be added to this set. To add a new hook, 1) add the new hook to
-the registry in HOOKS, 2) add a corresponding function that parses out necessary
-parameters.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-from absl import logging
-
-from official.r1.utils.logs import hooks
-from official.r1.utils.logs import logger
-from official.r1.utils.logs import metric_hook
-
-_TENSORS_TO_LOG = dict((x, x) for x in ['learning_rate',
-                                        'cross_entropy',
-                                        'train_accuracy'])
-
-
-def get_train_hooks(name_list, use_tpu=False, **kwargs):
-  """Factory for getting a list of TensorFlow hooks for training by name.
-
-  Args:
-    name_list: a list of strings to name desired hook classes. Allowed:
-      LoggingTensorHook, ProfilerHook, ExamplesPerSecondHook, which are defined
-      as keys in HOOKS
-    use_tpu: Boolean of whether computation occurs on a TPU. This will disable
-      hooks altogether.
-    **kwargs: a dictionary of arguments to the hooks.
-
-  Returns:
-    list of instantiated hooks, ready to be used in a classifier.train call.
-
-  Raises:
-    ValueError: if an unrecognized name is passed.
-  """
-
-  if not name_list:
-    return []
-
-  if use_tpu:
-    logging.warning(
-        'hooks_helper received name_list `%s`, but a '
-        'TPU is specified. No hooks will be used.', name_list)
-    return []
-
-  train_hooks = []
-  for name in name_list:
-    hook_name = HOOKS.get(name.strip().lower())
-    if hook_name is None:
-      raise ValueError('Unrecognized training hook requested: {}'.format(name))
-    else:
-      train_hooks.append(hook_name(**kwargs))
-
-  return train_hooks
-
-
-def get_logging_tensor_hook(every_n_iter=100, tensors_to_log=None, **kwargs):  # pylint: disable=unused-argument
-  """Function to get LoggingTensorHook.
-
-  Args:
-    every_n_iter: `int`, print the values of `tensors` once every N local
-      steps taken on the current worker.
-    tensors_to_log: List of tensor names or dictionary mapping labels to tensor
-      names. If not set, log _TENSORS_TO_LOG by default.
-    **kwargs: a dictionary of arguments to LoggingTensorHook.
-
-  Returns:
-    Returns a LoggingTensorHook with a standard set of tensors that will be
-    printed to stdout.
-  """
-  if tensors_to_log is None:
-    tensors_to_log = _TENSORS_TO_LOG
-
-  return tf.estimator.LoggingTensorHook(
-      tensors=tensors_to_log,
-      every_n_iter=every_n_iter)
-
-
-def get_profiler_hook(model_dir, save_steps=1000, **kwargs):  # pylint: disable=unused-argument
-  """Function to get ProfilerHook.
-
-  Args:
-    model_dir: The directory to save the profile traces to.
-    save_steps: `int`, print profile traces every N steps.
-    **kwargs: a dictionary of arguments to ProfilerHook.
-
-  Returns:
-    Returns a ProfilerHook that writes out timelines that can be loaded into
-    profiling tools like chrome://tracing.
-  """
-  return tf.estimator.ProfilerHook(save_steps=save_steps, output_dir=model_dir)
-
-
-def get_examples_per_second_hook(every_n_steps=100,
-                                 batch_size=128,
-                                 warm_steps=5,
-                                 **kwargs):  # pylint: disable=unused-argument
-  """Function to get ExamplesPerSecondHook.
-
-  Args:
-    every_n_steps: `int`, print current and average examples per second every
-      N steps.
-    batch_size: `int`, total batch size used to calculate examples/second from
-      global time.
-    warm_steps: skip this number of steps before logging and running average.
-    **kwargs: a dictionary of arguments to ExamplesPerSecondHook.
-
-  Returns:
-    Returns a ProfilerHook that writes out timelines that can be loaded into
-    profiling tools like chrome://tracing.
-  """
-  return hooks.ExamplesPerSecondHook(
-      batch_size=batch_size, every_n_steps=every_n_steps,
-      warm_steps=warm_steps, metric_logger=logger.get_benchmark_logger())
-
-
-def get_logging_metric_hook(tensors_to_log=None,
-                            every_n_secs=600,
-                            **kwargs):  # pylint: disable=unused-argument
-  """Function to get LoggingMetricHook.
-
-  Args:
-    tensors_to_log: List of tensor names or dictionary mapping labels to tensor
-      names. If not set, log _TENSORS_TO_LOG by default.
-    every_n_secs: `int`, the frequency for logging the metric. Default to every
-      10 mins.
-    **kwargs: a dictionary of arguments.
-
-  Returns:
-    Returns a LoggingMetricHook that saves tensor values in a JSON format.
-  """
-  if tensors_to_log is None:
-    tensors_to_log = _TENSORS_TO_LOG
-  return metric_hook.LoggingMetricHook(
-      tensors=tensors_to_log,
-      metric_logger=logger.get_benchmark_logger(),
-      every_n_secs=every_n_secs)
-
-
-def get_step_counter_hook(**kwargs):
-  """Function to get StepCounterHook."""
-  del kwargs
-  return tf.estimator.StepCounterHook()
-
-
-# A dictionary to map one hook name and its corresponding function
-HOOKS = {
-    'loggingtensorhook': get_logging_tensor_hook,
-    'profilerhook': get_profiler_hook,
-    'examplespersecondhook': get_examples_per_second_hook,
-    'loggingmetrichook': get_logging_metric_hook,
-    'stepcounterhook': get_step_counter_hook
-}
--- a/official/r1/utils/logs/hooks_test.py
+++ b/official/r1/utils/logs/hooks_test.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for hooks."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import time
-
-from absl import logging
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-
-from official.r1.utils.logs import hooks
-from official.r1.utils.logs import mock_lib
-
-logging.set_verbosity(logging.DEBUG)
-
-
-class ExamplesPerSecondHookTest(tf.test.TestCase):
-  """Tests for the ExamplesPerSecondHook.
-
-  In the test, we explicitly run global_step tensor after train_op in order to
-  keep the global_step value and the train_op (which increase the glboal_step
-  by 1) consistent. This is to correct the discrepancies in reported global_step
-  value when running on GPUs.
-  """
-
-  def setUp(self):
-    """Mock out logging calls to verify if correct info is being monitored."""
-    self._logger = mock_lib.MockBenchmarkLogger()
-
-    self.graph = tf.Graph()
-    with self.graph.as_default():
-      tf.compat.v1.train.create_global_step()
-      self.train_op = tf.compat.v1.assign_add(
-          tf.compat.v1.train.get_global_step(), 1)
-      self.global_step = tf.compat.v1.train.get_global_step()
-
-  def test_raise_in_both_secs_and_steps(self):
-    with self.assertRaises(ValueError):
-      hooks.ExamplesPerSecondHook(
-          batch_size=256,
-          every_n_steps=10,
-          every_n_secs=20,
-          metric_logger=self._logger)
-
-  def test_raise_in_none_secs_and_steps(self):
-    with self.assertRaises(ValueError):
-      hooks.ExamplesPerSecondHook(
-          batch_size=256,
-          every_n_steps=None,
-          every_n_secs=None,
-          metric_logger=self._logger)
-
-  def _validate_log_every_n_steps(self, every_n_steps, warm_steps):
-    hook = hooks.ExamplesPerSecondHook(
-        batch_size=256,
-        every_n_steps=every_n_steps,
-        warm_steps=warm_steps,
-        metric_logger=self._logger)
-
-    with tf.compat.v1.train.MonitoredSession(
-        tf.compat.v1.train.ChiefSessionCreator(), [hook]) as mon_sess:
-      for _ in range(every_n_steps):
-        # Explicitly run global_step after train_op to get the accurate
-        # global_step value
-        mon_sess.run(self.train_op)
-        mon_sess.run(self.global_step)
-        # Nothing should be in the list yet
-        self.assertFalse(self._logger.logged_metric)
-
-      mon_sess.run(self.train_op)
-      global_step_val = mon_sess.run(self.global_step)
-
-      if global_step_val > warm_steps:
-        self._assert_metrics()
-      else:
-        # Nothing should be in the list yet
-        self.assertFalse(self._logger.logged_metric)
-
-      # Add additional run to verify proper reset when called multiple times.
-      prev_log_len = len(self._logger.logged_metric)
-      mon_sess.run(self.train_op)
-      global_step_val = mon_sess.run(self.global_step)
-
-      if every_n_steps == 1 and global_step_val > warm_steps:
-        # Each time, we log two additional metrics. Did exactly 2 get added?
-        self.assertEqual(len(self._logger.logged_metric), prev_log_len + 2)
-      else:
-        # No change in the size of the metric list.
-        self.assertEqual(len(self._logger.logged_metric), prev_log_len)
-
-  def test_examples_per_sec_every_1_steps(self):
-    with self.graph.as_default():
-      self._validate_log_every_n_steps(1, 0)
-
-  def test_examples_per_sec_every_5_steps(self):
-    with self.graph.as_default():
-      self._validate_log_every_n_steps(5, 0)
-
-  def test_examples_per_sec_every_1_steps_with_warm_steps(self):
-    with self.graph.as_default():
-      self._validate_log_every_n_steps(1, 10)
-
-  def test_examples_per_sec_every_5_steps_with_warm_steps(self):
-    with self.graph.as_default():
-      self._validate_log_every_n_steps(5, 10)
-
-  def _validate_log_every_n_secs(self, every_n_secs):
-    hook = hooks.ExamplesPerSecondHook(
-        batch_size=256,
-        every_n_steps=None,
-        every_n_secs=every_n_secs,
-        metric_logger=self._logger)
-
-    with tf.compat.v1.train.MonitoredSession(
-        tf.compat.v1.train.ChiefSessionCreator(), [hook]) as mon_sess:
-      # Explicitly run global_step after train_op to get the accurate
-      # global_step value
-      mon_sess.run(self.train_op)
-      mon_sess.run(self.global_step)
-      # Nothing should be in the list yet
-      self.assertFalse(self._logger.logged_metric)
-      time.sleep(every_n_secs)
-
-      mon_sess.run(self.train_op)
-      mon_sess.run(self.global_step)
-      self._assert_metrics()
-
-  def test_examples_per_sec_every_1_secs(self):
-    with self.graph.as_default():
-      self._validate_log_every_n_secs(1)
-
-  def test_examples_per_sec_every_5_secs(self):
-    with self.graph.as_default():
-      self._validate_log_every_n_secs(5)
-
-  def _assert_metrics(self):
-    metrics = self._logger.logged_metric
-    self.assertEqual(metrics[-2]["name"], "average_examples_per_sec")
-    self.assertEqual(metrics[-1]["name"], "current_examples_per_sec")
-
-
-if __name__ == "__main__":
-  tf.test.main()
--- a/official/r1/utils/logs/logger.py
+++ b/official/r1/utils/logs/logger.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Logging utilities for benchmark.
-
-For collecting local environment metrics like CPU and memory, certain python
-packages need be installed. See README for details.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import datetime
-import json
-import numbers
-import os
-import threading
-import uuid
-
-from absl import flags
-from absl import logging
-from six.moves import _thread as thread
-import tensorflow as tf
-from tensorflow.python.client import device_lib
-from official.r1.utils.logs import cloud_lib
-
-METRIC_LOG_FILE_NAME = "metric.log"
-BENCHMARK_RUN_LOG_FILE_NAME = "benchmark_run.log"
-_DATE_TIME_FORMAT_PATTERN = "%Y-%m-%dT%H:%M:%S.%fZ"
-GCP_TEST_ENV = "GCP"
-RUN_STATUS_SUCCESS = "success"
-RUN_STATUS_FAILURE = "failure"
-RUN_STATUS_RUNNING = "running"
-
-
-FLAGS = flags.FLAGS
-
-# Don't use it directly. Use get_benchmark_logger to access a logger.
-_benchmark_logger = None
-_logger_lock = threading.Lock()
-
-
-def config_benchmark_logger(flag_obj=None):
-  """Config the global benchmark logger."""
-  _logger_lock.acquire()
-  try:
-    global _benchmark_logger
-    if not flag_obj:
-      flag_obj = FLAGS
-
-    if (not hasattr(flag_obj, "benchmark_logger_type") or
-        flag_obj.benchmark_logger_type == "BaseBenchmarkLogger"):
-      _benchmark_logger = BaseBenchmarkLogger()
-    elif flag_obj.benchmark_logger_type == "BenchmarkFileLogger":
-      _benchmark_logger = BenchmarkFileLogger(flag_obj.benchmark_log_dir)
-    else:
-      raise ValueError("Unrecognized benchmark_logger_type: %s"
-                       % flag_obj.benchmark_logger_type)
-
-  finally:
-    _logger_lock.release()
-  return _benchmark_logger
-
-
-def get_benchmark_logger():
-  if not _benchmark_logger:
-    config_benchmark_logger()
-  return _benchmark_logger
-
-
-@contextlib.contextmanager
-def benchmark_context(flag_obj):
-  """Context of benchmark, which will update status of the run accordingly."""
-  benchmark_logger = config_benchmark_logger(flag_obj)
-  try:
-    yield
-    benchmark_logger.on_finish(RUN_STATUS_SUCCESS)
-  except Exception:  # pylint: disable=broad-except
-    # Catch all the exception, update the run status to be failure, and re-raise
-    benchmark_logger.on_finish(RUN_STATUS_FAILURE)
-    raise
-
-
-class BaseBenchmarkLogger(object):
-  """Class to log the benchmark information to STDOUT."""
-
-  def log_evaluation_result(self, eval_results):
-    """Log the evaluation result.
-
-    The evaluate result is a dictionary that contains metrics defined in
-    model_fn. It also contains a entry for global_step which contains the value
-    of the global step when evaluation was performed.
-
-    Args:
-      eval_results: dict, the result of evaluate.
-    """
-    if not isinstance(eval_results, dict):
-      logging.warning("eval_results should be dictionary for logging. Got %s",
-                      type(eval_results))
-      return
-    global_step = eval_results[tf.compat.v1.GraphKeys.GLOBAL_STEP]
-    for key in sorted(eval_results):
-      if key != tf.compat.v1.GraphKeys.GLOBAL_STEP:
-        self.log_metric(key, eval_results[key], global_step=global_step)
-
-  def log_metric(self, name, value, unit=None, global_step=None, extras=None):
-    """Log the benchmark metric information to local file.
-
-    Currently the logging is done in a synchronized way. This should be updated
-    to log asynchronously.
-
-    Args:
-      name: string, the name of the metric to log.
-      value: number, the value of the metric. The value will not be logged if it
-        is not a number type.
-      unit: string, the unit of the metric, E.g "image per second".
-      global_step: int, the global_step when the metric is logged.
-      extras: map of string:string, the extra information about the metric.
-    """
-    metric = _process_metric_to_json(name, value, unit, global_step, extras)
-    if metric:
-      logging.info("Benchmark metric: %s", metric)
-
-  def log_run_info(self, model_name, dataset_name, run_params, test_id=None):
-    logging.info(
-        "Benchmark run: %s",
-        _gather_run_info(model_name, dataset_name, run_params, test_id))
-
-  def on_finish(self, status):
-    pass
-
-
-class BenchmarkFileLogger(BaseBenchmarkLogger):
-  """Class to log the benchmark information to local disk."""
-
-  def __init__(self, logging_dir):
-    super(BenchmarkFileLogger, self).__init__()
-    self._logging_dir = logging_dir
-    if not tf.io.gfile.isdir(self._logging_dir):
-      tf.io.gfile.makedirs(self._logging_dir)
-    self._metric_file_handler = tf.io.gfile.GFile(
-        os.path.join(self._logging_dir, METRIC_LOG_FILE_NAME), "a")
-
-  def log_metric(self, name, value, unit=None, global_step=None, extras=None):
-    """Log the benchmark metric information to local file.
-
-    Currently the logging is done in a synchronized way. This should be updated
-    to log asynchronously.
-
-    Args:
-      name: string, the name of the metric to log.
-      value: number, the value of the metric. The value will not be logged if it
-        is not a number type.
-      unit: string, the unit of the metric, E.g "image per second".
-      global_step: int, the global_step when the metric is logged.
-      extras: map of string:string, the extra information about the metric.
-    """
-    metric = _process_metric_to_json(name, value, unit, global_step, extras)
-    if metric:
-      try:
-        json.dump(metric, self._metric_file_handler)
-        self._metric_file_handler.write("\n")
-        self._metric_file_handler.flush()
-      except (TypeError, ValueError) as e:
-        logging.warning(
-            "Failed to dump metric to log file: name %s, value %s, error %s",
-            name, value, e)
-
-  def log_run_info(self, model_name, dataset_name, run_params, test_id=None):
-    """Collect most of the TF runtime information for the local env.
-
-    The schema of the run info follows official/benchmark/datastore/schema.
-
-    Args:
-      model_name: string, the name of the model.
-      dataset_name: string, the name of dataset for training and evaluation.
-      run_params: dict, the dictionary of parameters for the run, it could
-        include hyperparameters or other params that are important for the run.
-      test_id: string, the unique name of the test run by the combination of key
-        parameters, eg batch size, num of GPU. It is hardware independent.
-    """
-    run_info = _gather_run_info(model_name, dataset_name, run_params, test_id)
-
-    with tf.io.gfile.GFile(os.path.join(
-        self._logging_dir, BENCHMARK_RUN_LOG_FILE_NAME), "w") as f:
-      try:
-        json.dump(run_info, f)
-        f.write("\n")
-      except (TypeError, ValueError) as e:
-        logging.warning("Failed to dump benchmark run info to log file: %s", e)
-
-  def on_finish(self, status):
-    self._metric_file_handler.flush()
-    self._metric_file_handler.close()
-
-
-def _gather_run_info(model_name, dataset_name, run_params, test_id):
-  """Collect the benchmark run information for the local environment."""
-  run_info = {
-      "model_name": model_name,
-      "dataset": {"name": dataset_name},
-      "machine_config": {},
-      "test_id": test_id,
-      "run_date": datetime.datetime.utcnow().strftime(
-          _DATE_TIME_FORMAT_PATTERN)}
-  _collect_tensorflow_info(run_info)
-  _collect_tensorflow_environment_variables(run_info)
-  _collect_run_params(run_info, run_params)
-  _collect_memory_info(run_info)
-  _collect_test_environment(run_info)
-  return run_info
-
-
-def _process_metric_to_json(
-    name, value, unit=None, global_step=None, extras=None):
-  """Validate the metric data and generate JSON for insert."""
-  if not isinstance(value, numbers.Number):
-    logging.warning("Metric value to log should be a number. Got %s",
-                    type(value))
-    return None
-
-  extras = _convert_to_json_dict(extras)
-  return {
-      "name": name,
-      "value": float(value),
-      "unit": unit,
-      "global_step": global_step,
-      "timestamp": datetime.datetime.utcnow().strftime(
-          _DATE_TIME_FORMAT_PATTERN),
-      "extras": extras}
-
-
-def _collect_tensorflow_info(run_info):
-  run_info["tensorflow_version"] = {
-      "version": tf.version.VERSION, "git_hash": tf.version.GIT_VERSION}
-
-
-def _collect_run_params(run_info, run_params):
-  """Log the parameter information for the benchmark run."""
-  def process_param(name, value):
-    type_check = {
-        str: {"name": name, "string_value": value},
-        int: {"name": name, "long_value": value},
-        bool: {"name": name, "bool_value": str(value)},
-        float: {"name": name, "float_value": value},
-    }
-    return type_check.get(type(value),
-                          {"name": name, "string_value": str(value)})
-  if run_params:
-    run_info["run_parameters"] = [
-        process_param(k, v) for k, v in sorted(run_params.items())]
-
-
-def _collect_tensorflow_environment_variables(run_info):
-  run_info["tensorflow_environment_variables"] = [
-      {"name": k, "value": v}
-      for k, v in sorted(os.environ.items()) if k.startswith("TF_")]
-
-
-def _collect_memory_info(run_info):
-  try:
-    # Note: psutil is not installed in the TensorFlow OSS tree.
-    # It is installable via pip.
-    import psutil   # pylint: disable=g-import-not-at-top
-    vmem = psutil.virtual_memory()
-    run_info["machine_config"]["memory_total"] = vmem.total
-    run_info["machine_config"]["memory_available"] = vmem.available
-  except ImportError:
-    logging.warn("'psutil' not imported. Memory info will not be logged.")
-
-
-def _collect_test_environment(run_info):
-  """Detect the local environment, eg GCE, AWS or DGX, etc."""
-  if cloud_lib.on_gcp():
-    run_info["test_environment"] = GCP_TEST_ENV
-  # TODO(scottzhu): Add more testing env detection for other platform
-
-
-def _parse_gpu_model(physical_device_desc):
-  # Assume all the GPU connected are same model
-  for kv in physical_device_desc.split(","):
-    k, _, v = kv.partition(":")
-    if k.strip() == "name":
-      return v.strip()
-  return None
-
-
-def _convert_to_json_dict(input_dict):
-  if input_dict:
-    return [{"name": k, "value": v} for k, v in sorted(input_dict.items())]
-  else:
-    return []
--- a/official/r1/utils/logs/logger_test.py
+++ b/official/r1/utils/logs/logger_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for benchmark logger."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import json
-import os
-import tempfile
-import time
-import unittest
-
-from absl import logging
-from absl.testing import flagsaver
-import tensorflow as tf
-
-from official.r1.utils.logs import logger
-from official.utils.flags import core as flags_core
-from official.utils.misc import keras_utils
-
-
-class BenchmarkLoggerTest(tf.test.TestCase):
-
-  @classmethod
-  def setUpClass(cls):  # pylint: disable=invalid-name
-    super(BenchmarkLoggerTest, cls).setUpClass()
-    flags_core.define_benchmark()
-
-  def test_get_default_benchmark_logger(self):
-    with flagsaver.flagsaver(benchmark_logger_type="foo"):
-      self.assertIsInstance(logger.get_benchmark_logger(),
-                            logger.BaseBenchmarkLogger)
-
-  def test_config_base_benchmark_logger(self):
-    with flagsaver.flagsaver(benchmark_logger_type="BaseBenchmarkLogger"):
-      logger.config_benchmark_logger()
-      self.assertIsInstance(logger.get_benchmark_logger(),
-                            logger.BaseBenchmarkLogger)
-
-  def test_config_benchmark_file_logger(self):
-    # Set the benchmark_log_dir first since the benchmark_logger_type will need
-    # the value to be set when it does the validation.
-    with flagsaver.flagsaver(benchmark_log_dir="/tmp"):
-      with flagsaver.flagsaver(benchmark_logger_type="BenchmarkFileLogger"):
-        logger.config_benchmark_logger()
-        self.assertIsInstance(logger.get_benchmark_logger(),
-                              logger.BenchmarkFileLogger)
-
-
-class BaseBenchmarkLoggerTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(BaseBenchmarkLoggerTest, self).setUp()
-    self._actual_log = logging.info
-    self.logged_message = None
-
-    def mock_log(*args, **kwargs):
-      self.logged_message = args
-      self._actual_log(*args, **kwargs)
-
-    logging.info = mock_log
-
-  def tearDown(self):
-    super(BaseBenchmarkLoggerTest, self).tearDown()
-    logging.info = self._actual_log
-
-  def test_log_metric(self):
-    log = logger.BaseBenchmarkLogger()
-    log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"})
-
-    expected_log_prefix = "Benchmark metric:"
-    self.assertRegexpMatches(str(self.logged_message), expected_log_prefix)
-
-
-class BenchmarkFileLoggerTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(BenchmarkFileLoggerTest, self).setUp()
-    # Avoid pulling extra env vars from test environment which affects the test
-    # result, eg. Kokoro test has a TF_PKG env which affect the test case
-    # test_collect_tensorflow_environment_variables()
-    self.original_environ = dict(os.environ)
-    os.environ.clear()
-
-  def tearDown(self):
-    super(BenchmarkFileLoggerTest, self).tearDown()
-    tf.io.gfile.rmtree(self.get_temp_dir())
-    os.environ.clear()
-    os.environ.update(self.original_environ)
-
-  def test_create_logging_dir(self):
-    non_exist_temp_dir = os.path.join(self.get_temp_dir(), "unknown_dir")
-    self.assertFalse(tf.io.gfile.isdir(non_exist_temp_dir))
-
-    logger.BenchmarkFileLogger(non_exist_temp_dir)
-    self.assertTrue(tf.io.gfile.isdir(non_exist_temp_dir))
-
-  def test_log_metric(self):
-    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkFileLogger(log_dir)
-    log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"})
-
-    metric_log = os.path.join(log_dir, "metric.log")
-    self.assertTrue(tf.io.gfile.exists(metric_log))
-    with tf.io.gfile.GFile(metric_log) as f:
-      metric = json.loads(f.readline())
-      self.assertEqual(metric["name"], "accuracy")
-      self.assertEqual(metric["value"], 0.999)
-      self.assertEqual(metric["unit"], None)
-      self.assertEqual(metric["global_step"], 1e4)
-      self.assertEqual(metric["extras"], [{"name": "name", "value": "value"}])
-
-  def test_log_multiple_metrics(self):
-    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkFileLogger(log_dir)
-    log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"})
-    log.log_metric("loss", 0.02, global_step=1e4)
-
-    metric_log = os.path.join(log_dir, "metric.log")
-    self.assertTrue(tf.io.gfile.exists(metric_log))
-    with tf.io.gfile.GFile(metric_log) as f:
-      accuracy = json.loads(f.readline())
-      self.assertEqual(accuracy["name"], "accuracy")
-      self.assertEqual(accuracy["value"], 0.999)
-      self.assertEqual(accuracy["unit"], None)
-      self.assertEqual(accuracy["global_step"], 1e4)
-      self.assertEqual(accuracy["extras"], [{"name": "name", "value": "value"}])
-
-      loss = json.loads(f.readline())
-      self.assertEqual(loss["name"], "loss")
-      self.assertEqual(loss["value"], 0.02)
-      self.assertEqual(loss["unit"], None)
-      self.assertEqual(loss["global_step"], 1e4)
-      self.assertEqual(loss["extras"], [])
-
-  def test_log_non_number_value(self):
-    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkFileLogger(log_dir)
-    const = tf.constant(1)
-    log.log_metric("accuracy", const)
-
-    metric_log = os.path.join(log_dir, "metric.log")
-    self.assertFalse(tf.io.gfile.exists(metric_log))
-
-  def test_log_evaluation_result(self):
-    eval_result = {"loss": 0.46237424,
-                   "global_step": 207082,
-                   "accuracy": 0.9285}
-    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkFileLogger(log_dir)
-    log.log_evaluation_result(eval_result)
-
-    metric_log = os.path.join(log_dir, "metric.log")
-    self.assertTrue(tf.io.gfile.exists(metric_log))
-    with tf.io.gfile.GFile(metric_log) as f:
-      accuracy = json.loads(f.readline())
-      self.assertEqual(accuracy["name"], "accuracy")
-      self.assertEqual(accuracy["value"], 0.9285)
-      self.assertEqual(accuracy["unit"], None)
-      self.assertEqual(accuracy["global_step"], 207082)
-
-      loss = json.loads(f.readline())
-      self.assertEqual(loss["name"], "loss")
-      self.assertEqual(loss["value"], 0.46237424)
-      self.assertEqual(loss["unit"], None)
-      self.assertEqual(loss["global_step"], 207082)
-
-  def test_log_evaluation_result_with_invalid_type(self):
-    eval_result = "{'loss': 0.46237424, 'global_step': 207082}"
-    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkFileLogger(log_dir)
-    log.log_evaluation_result(eval_result)
-
-    metric_log = os.path.join(log_dir, "metric.log")
-    self.assertFalse(tf.io.gfile.exists(metric_log))
-
-  def test_collect_tensorflow_info(self):
-    run_info = {}
-    logger._collect_tensorflow_info(run_info)
-    self.assertNotEqual(run_info["tensorflow_version"], {})
-    self.assertEqual(run_info["tensorflow_version"]["version"],
-                     tf.version.VERSION)
-    self.assertEqual(run_info["tensorflow_version"]["git_hash"],
-                     tf.version.GIT_VERSION)
-
-  def test_collect_run_params(self):
-    run_info = {}
-    run_parameters = {
-        "batch_size": 32,
-        "synthetic_data": True,
-        "train_epochs": 100.00,
-        "dtype": "fp16",
-        "resnet_size": 50,
-        "random_tensor": tf.constant(2.0)
-    }
-    logger._collect_run_params(run_info, run_parameters)
-    self.assertEqual(len(run_info["run_parameters"]), 6)
-    self.assertEqual(run_info["run_parameters"][0],
-                     {"name": "batch_size", "long_value": 32})
-    self.assertEqual(run_info["run_parameters"][1],
-                     {"name": "dtype", "string_value": "fp16"})
-    v1_tensor = {"name": "random_tensor", "string_value":
-                     "Tensor(\"Const:0\", shape=(), dtype=float32)"}
-    v2_tensor = {"name": "random_tensor", "string_value":
-                     "tf.Tensor(2.0, shape=(), dtype=float32)"}
-    self.assertIn(run_info["run_parameters"][2], [v1_tensor, v2_tensor])
-
-
-    self.assertEqual(run_info["run_parameters"][3],
-                     {"name": "resnet_size", "long_value": 50})
-    self.assertEqual(run_info["run_parameters"][4],
-                     {"name": "synthetic_data", "bool_value": "True"})
-    self.assertEqual(run_info["run_parameters"][5],
-                     {"name": "train_epochs", "float_value": 100.00})
-
-  def test_collect_tensorflow_environment_variables(self):
-    os.environ["TF_ENABLE_WINOGRAD_NONFUSED"] = "1"
-    os.environ["TF_OTHER"] = "2"
-    os.environ["OTHER"] = "3"
-
-    run_info = {}
-    logger._collect_tensorflow_environment_variables(run_info)
-    self.assertIsNotNone(run_info["tensorflow_environment_variables"])
-    expected_tf_envs = [
-        {"name": "TF_ENABLE_WINOGRAD_NONFUSED", "value": "1"},
-        {"name": "TF_OTHER", "value": "2"},
-    ]
-    self.assertEqual(run_info["tensorflow_environment_variables"],
-                     expected_tf_envs)
-
-  def test_collect_memory_info(self):
-    run_info = {"machine_config": {}}
-    logger._collect_memory_info(run_info)
-    self.assertIsNotNone(run_info["machine_config"]["memory_total"])
-    self.assertIsNotNone(run_info["machine_config"]["memory_available"])
-
-
-if __name__ == "__main__":
-  tf.test.main()