Merge branch 'dtk21.10.1_v1' into 'main'

update some TF file See merge request dcutoolkit/deeplearing/dlexamples_new!5

Merge branch 'dtk21.10.1_v1' into 'main'
update some TF file See merge request dcutoolkit/deeplearing/dlexamples_new!5
7f99c1c3 · huchen · 6b6f8b0c · cf66c525 · 7f99c1c3 · 7f99c1c3
Commit 7f99c1c3 authored Apr 15, 2022 by huchen
20 changed files
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/colab/decoding_api_in_tf_nlp.ipynb
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/colab/decoding_api_in_tf_nlp.ipynb
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vXLA5InzXydn"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "RuRlpLL-X0R_"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fsACVQpVSifi"
+      },
+      "source": [
+        "### Install the TensorFlow Model Garden pip package\n",
+        "\n",
+        "*  `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
+        "which is the nightly Model Garden package created daily automatically.\n",
+        "*  pip will install all models and dependencies automatically."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hYEwGTeCXnnX"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/tutorials/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2j-xhrsVQOQT"
+      },
+      "outputs": [],
+      "source": [
+        "pip install  tf-models-nightly"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BjP7zwxmskpY"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from official import nlp\n",
+        "from official.nlp.modeling.ops import sampling_module\n",
+        "from official.nlp.modeling.ops import beam_search"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0AWgyo-IQ5sP"
+      },
+      "source": [
+        "# Decoding API\n",
+        "This API provides an interface to experiment with different decoding strategies used for auto-regressive models.\n",
+        "\n",
+        "1. The following sampling strategies are provided in sampling_module.py, which inherits from the base Decoding class:\n",
+        "  *   [top_p](https://arxiv.org/abs/1904.09751) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L65) \n",
+        "\n",
+        "      This implementation chooses most probable logits with cumulative probabilities upto top_p.\n",
+        "\n",
+        "  *   [top_k](https://arxiv.org/pdf/1805.04833.pdf) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L48)\n",
+        "\n",
+        "      At each timestep, this implementation samples from top-k logits based on their probability distribution\n",
+        "\n",
+        "  *   Greedy : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L26)\n",
+        "\n",
+        "      This implementation returns the top logits based on probabilities.\n",
+        "\n",
+        "2. Beam search is provided in beam_search.py. [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search.py)\n",
+        "\n",
+        "      This implementation reduces the risk of missing hidden high probability logits by keeping the most likely num_beams of logits at each time step and eventually choosing the logits that has the overall highest probability."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfOj7oaBRQnS"
+      },
+      "source": [
+        "## Initialize Sampling Module in TF-NLP.\n",
+        "\n",
+        "\n",
+        "\u003e **symbols_to_logits_fn** : This is a closure implemented by the users of the API. The input to this closure will be  \n",
+        "```\n",
+        "Args:\n",
+        "  1] ids [batch_size, .. (index + 1 or 1 if padded_decode is True)],\n",
+        "  2] index [scalar] : current decoded step,\n",
+        "  3] cache [nested dictionary of tensors].\n",
+        "Returns:\n",
+        "  1] tensor for next-step logits [batch_size, vocab]\n",
+        "  2] the updated_cache [nested dictionary of tensors].\n",
+        "```\n",
+        "This closure calls the model to predict the logits for the 'index+1' step. The cache is used for faster decoding.\n",
+        "Here is a [reference](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search_test.py#L88) implementation for the above closure.\n",
+        "\n",
+        "\n",
+        "\u003e **length_normalization_fn** : Closure for returning length normalization parameter.\n",
+        "```\n",
+        "Args: \n",
+        "  1] length : scalar for decoded step index.\n",
+        "  2] dtype : data-type of output tensor\n",
+        "Returns:\n",
+        "  1] value of length normalization factor.\n",
+        "Example :\n",
+        "  def _length_norm(length, dtype):\n",
+        "    return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)\n",
+        "```\n",
+        "\n",
+        "\u003e **vocab_size** : Output vocabulary size.\n",
+        "\n",
+        "\u003e **max_decode_length** : Scalar for total number of decoding steps.\n",
+        "\n",
+        "\u003e **eos_id** : Decoding will stop if all output decoded ids in the batch have this ID.\n",
+        "\n",
+        "\u003e **padded_decode** : Set this to True if running on TPU. Tensors are padded to max_decoding_length if this is True.\n",
+        "\n",
+        "\u003e **top_k** : top_k is enabled if this value is \u003e 1.\n",
+        "\n",
+        "\u003e **top_p** : top_p is enabled if this value is \u003e 0 and \u003c 1.0\n",
+        "\n",
+        "\u003e **sampling_temperature** : This is used to re-estimate the softmax output. Temperature skews the distribution towards high probability tokens and lowers the mass in tail distribution. Value has to be positive. Low temperature is equivalent to greedy and makes the distribution sharper, while high temperature makes it more flat.\n",
+        "\n",
+        "\u003e **enable_greedy** : By default, this is true and greedy decoding is enabled.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lV1RRp6ihnGX"
+      },
+      "source": [
+        "# Initialize the Model Hyper-parameters"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eTsGp2gaKLdE"
+      },
+      "outputs": [],
+      "source": [
+        "params = {}\n",
+        "params['num_heads'] = 2\n",
+        "params['num_layers'] = 2\n",
+        "params['batch_size'] = 2\n",
+        "params['n_dims'] = 256\n",
+        "params['max_decode_length'] = 4"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UGvmd0_dRFYI"
+      },
+      "source": [
+        "## What is a Cache?\n",
+        "In auto-regressive architectures like Transformer based [Encoder-Decoder](https://arxiv.org/abs/1706.03762) models, \n",
+        "Cache is used for fast sequential decoding.\n",
+        "It is a nested dictionary storing pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention blocks) for every layer.\n",
+        "\n",
+        "```\n",
+        "{\n",
+        "    'layer_%d' % layer: {\n",
+        "        'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32),\n",
+        "        'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32)\n",
+        "        } for layer in range(params['num_layers']),\n",
+        "    'model_specific_item' : Model specific tensor shape,\n",
+        "}\n",
+        "\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CYXkoplAij01"
+      },
+      "source": [
+        "# Initialize cache. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "D6kfZOOKgkm1"
+      },
+      "outputs": [],
+      "source": [
+        "cache = {\n",
+        "    'layer_%d' % layer: {\n",
+        "        'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32),\n",
+        "        'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32)\n",
+        "        } for layer in range(params['num_layers'])\n",
+        "    }\n",
+        "print(\"cache key shape for layer 1 :\", cache['layer_1']['k'].shape)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nNY3Xn8SiblP"
+      },
+      "source": [
+        "# Define closure for length normalization. **optional.**\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "T92ccAzlnGqh"
+      },
+      "outputs": [],
+      "source": [
+        "def length_norm(length, dtype):\n",
+        "  \"\"\"Return length normalization factor.\"\"\"\n",
+        "  return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "syl7I5nURPgW"
+      },
+      "source": [
+        "# Create model_fn\n",
+        "  In practice, this will be replaced by an actual model implementation such as [here](https://github.com/tensorflow/models/blob/master/official/nlp/transformer/transformer.py#L236)\n",
+        "```\n",
+        "Args:\n",
+        "i : Step that is being decoded.\n",
+        "Returns:\n",
+        "  logit probabilities of size [batch_size, 1, vocab_size]\n",
+        "```\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AhzSkRisRdB6"
+      },
+      "outputs": [],
+      "source": [
+        "probabilities = tf.constant([[[0.3, 0.4, 0.3], [0.3, 0.3, 0.4],\n",
+        "                              [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]],\n",
+        "                            [[0.2, 0.5, 0.3], [0.2, 0.7, 0.1],\n",
+        "                              [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]]])\n",
+        "def model_fn(i):\n",
+        "  return probabilities[:, i, :]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DBMUkaVmVZBg"
+      },
+      "source": [
+        "# Initialize symbols_to_logits_fn\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FAJ4CpbfVdjr"
+      },
+      "outputs": [],
+      "source": [
+        "def _symbols_to_logits_fn():\n",
+        "  \"\"\"Calculates logits of the next tokens.\"\"\"\n",
+        "  def symbols_to_logits_fn(ids, i, temp_cache):\n",
+        "    del ids\n",
+        "    logits = tf.cast(tf.math.log(model_fn(i)), tf.float32)\n",
+        "    return logits, temp_cache\n",
+        "  return symbols_to_logits_fn"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "R_tV3jyWVL47"
+      },
+      "source": [
+        "# Greedy \n",
+        "Greedy decoding selects the token id with the highest probability as its next id: $id_t = argmax_{w}P(id | id_{1:t-1})$ at each timestep $t$. The following sketch shows greedy decoding. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aGt9idSkVQEJ"
+      },
+      "outputs": [],
+      "source": [
+        "greedy_obj = sampling_module.SamplingModule(\n",
+        "    length_normalization_fn=None,\n",
+        "    dtype=tf.float32,\n",
+        "    symbols_to_logits_fn=_symbols_to_logits_fn(),\n",
+        "    vocab_size=3,\n",
+        "    max_decode_length=params['max_decode_length'],\n",
+        "    eos_id=10,\n",
+        "    padded_decode=False)\n",
+        "ids, _ = greedy_obj.generate(\n",
+        "    initial_ids=tf.constant([9, 1]), initial_cache=cache)\n",
+        "print(\"Greedy Decoded Ids:\", ids)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s4pTTsQXVz5O"
+      },
+      "source": [
+        "# top_k sampling\n",
+        "In *Top-K* sampling, the *K* most likely next token ids are filtered and the probability mass is redistributed among only those *K* ids. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pCLWIn6GV5_G"
+      },
+      "outputs": [],
+      "source": [
+        "top_k_obj = sampling_module.SamplingModule(\n",
+        "    length_normalization_fn=length_norm,\n",
+        "    dtype=tf.float32,\n",
+        "    symbols_to_logits_fn=_symbols_to_logits_fn(),\n",
+        "    vocab_size=3,\n",
+        "    max_decode_length=params['max_decode_length'],\n",
+        "    eos_id=10,\n",
+        "    sample_temperature=tf.constant(1.0),\n",
+        "    top_k=tf.constant(3),\n",
+        "    padded_decode=False,\n",
+        "    enable_greedy=False)\n",
+        "ids, _ = top_k_obj.generate(\n",
+        "    initial_ids=tf.constant([9, 1]), initial_cache=cache)\n",
+        "print(\"top-k sampled Ids:\", ids)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Jp3G-eE_WI4Y"
+      },
+      "source": [
+        "# top_p sampling\n",
+        "Instead of sampling only from the most likely *K* token ids, in *Top-p* sampling chooses from the smallest possible set of ids whose cumulative probability exceeds the probability *p*."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rEGdIWcuWILO"
+      },
+      "outputs": [],
+      "source": [
+        "top_p_obj = sampling_module.SamplingModule(\n",
+        "    length_normalization_fn=length_norm,\n",
+        "    dtype=tf.float32,\n",
+        "    symbols_to_logits_fn=_symbols_to_logits_fn(),\n",
+        "    vocab_size=3,\n",
+        "    max_decode_length=params['max_decode_length'],\n",
+        "    eos_id=10,\n",
+        "    sample_temperature=tf.constant(1.0),\n",
+        "    top_p=tf.constant(0.9),\n",
+        "    padded_decode=False,\n",
+        "    enable_greedy=False)\n",
+        "ids, _ = top_p_obj.generate(\n",
+        "    initial_ids=tf.constant([9, 1]), initial_cache=cache)\n",
+        "print(\"top-p sampled Ids:\", ids)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2hcuyJ2VWjDz"
+      },
+      "source": [
+        "# Beam search decoding\n",
+        "Beam search reduces the risk of missing hidden high probability token ids by keeping the most likely num_beams of hypotheses at each time step and eventually choosing the hypothesis that has the overall highest probability. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cJ3WzvSrWmSA"
+      },
+      "outputs": [],
+      "source": [
+        "beam_size = 2\n",
+        "params['batch_size'] = 1\n",
+        "beam_cache = {\n",
+        "    'layer_%d' % layer: {\n",
+        "        'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32),\n",
+        "        'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32)\n",
+        "        } for layer in range(params['num_layers'])\n",
+        "    }\n",
+        "print(\"cache key shape for layer 1 :\", beam_cache['layer_1']['k'].shape)\n",
+        "ids, _ = beam_search.sequence_beam_search(\n",
+        "    symbols_to_logits_fn=_symbols_to_logits_fn(),\n",
+        "    initial_ids=tf.constant([9], tf.int32),\n",
+        "    initial_cache=beam_cache,\n",
+        "    vocab_size=3,\n",
+        "    beam_size=beam_size,\n",
+        "    alpha=0.6,\n",
+        "    max_decode_length=params['max_decode_length'],\n",
+        "    eos_id=10,\n",
+        "    padded_decode=False,\n",
+        "    dtype=tf.float32)\n",
+        "print(\"Beam search ids:\", ids)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "decoding_api_in_tf_nlp.ipynb",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/colab/nlp/customize_encoder.ipynb
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/colab/nlp/customize_encoder.ipynb
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Customizing a Transformer Encoder",
+      "private_outputs": true,
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Bp8t2AI8i7uP"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "cellView": "form",
+        "id": "rxPj2Lsni9O4"
+      },
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6xS-9i5DrRvO"
+      },
+      "source": [
+        "# Customizing a Transformer Encoder"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Mwb9uw1cDXsa"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/official_models/nlp/customize_encoder\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/nlp/customize_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iLrcV4IyrcGX"
+      },
+      "source": [
+        "## Learning objectives\n",
+        "\n",
+        "The [TensorFlow Models NLP library](https://github.com/tensorflow/models/tree/master/official/nlp/modeling) is a collection of tools for building and training modern high performance natural language models.\n",
+        "\n",
+        "The [TransformEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py) is the core of this library, and lots of new network architectures are proposed to improve the encoder. In this Colab notebook, we will learn how to customize the encoder to employ new network architectures."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YYxdyoWgsl8t"
+      },
+      "source": [
+        "## Install and import"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fEJSFutUsn_h"
+      },
+      "source": [
+        "### Install the TensorFlow Model Garden pip package\n",
+        "\n",
+        "*  `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
+        "which is the nightly Model Garden package created daily automatically.\n",
+        "*  `pip` will install all models and dependencies automatically."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "thsKZDjhswhR"
+      },
+      "source": [
+        "!pip install -q tf-models-official==2.4.0"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hpf7JPCVsqtv"
+      },
+      "source": [
+        "### Import Tensorflow and other libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "my4dp-RMssQe"
+      },
+      "source": [
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from official.modeling import activations\n",
+        "from official.nlp import modeling\n",
+        "from official.nlp.modeling import layers, losses, models, networks"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vjDmVsFfs85n"
+      },
+      "source": [
+        "## Canonical BERT encoder\n",
+        "\n",
+        "Before learning how to customize the encoder, let's firstly create a canonical BERT enoder and use it to instantiate a `BertClassifier` for classification task."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Oav8sbgstWc-"
+      },
+      "source": [
+        "cfg = {\n",
+        "    \"vocab_size\": 100,\n",
+        "    \"hidden_size\": 32,\n",
+        "    \"num_layers\": 3,\n",
+        "    \"num_attention_heads\": 4,\n",
+        "    \"intermediate_size\": 64,\n",
+        "    \"activation\": activations.gelu,\n",
+        "    \"dropout_rate\": 0.1,\n",
+        "    \"attention_dropout_rate\": 0.1,\n",
+        "    \"max_sequence_length\": 16,\n",
+        "    \"type_vocab_size\": 2,\n",
+        "    \"initializer\": tf.keras.initializers.TruncatedNormal(stddev=0.02),\n",
+        "}\n",
+        "bert_encoder = modeling.networks.BertEncoder(**cfg)\n",
+        "\n",
+        "def build_classifier(bert_encoder):\n",
+        "  return modeling.models.BertClassifier(bert_encoder, num_classes=2)\n",
+        "\n",
+        "canonical_classifier_model = build_classifier(bert_encoder)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Qe2UWI6_tsHo"
+      },
+      "source": [
+        "`canonical_classifier_model` can be trained using the training data. For details about how to train the model, please see the colab [fine_tuning_bert.ipynb](https://github.com/tensorflow/models/blob/master/official/colab/fine_tuning_bert.ipynb). We skip the code that trains the model here.\n",
+        "\n",
+        "After training, we can apply the model to do prediction.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "csED2d-Yt5h6"
+      },
+      "source": [
+        "def predict(model):\n",
+        "  batch_size = 3\n",
+        "  np.random.seed(0)\n",
+        "  word_ids = np.random.randint(\n",
+        "      cfg[\"vocab_size\"], size=(batch_size, cfg[\"max_sequence_length\"]))\n",
+        "  mask = np.random.randint(2, size=(batch_size, cfg[\"max_sequence_length\"]))\n",
+        "  type_ids = np.random.randint(\n",
+        "      cfg[\"type_vocab_size\"], size=(batch_size, cfg[\"max_sequence_length\"]))\n",
+        "  print(model([word_ids, mask, type_ids], training=False))\n",
+        "\n",
+        "predict(canonical_classifier_model)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PzKStEK9t_Pb"
+      },
+      "source": [
+        "## Customize BERT encoder\n",
+        "\n",
+        "One BERT encoder consists of an embedding network and multiple transformer blocks, and each transformer block contains an attention layer and a feedforward layer."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rmwQfhj6fmKz"
+      },
+      "source": [
+        "We provide easy ways to customize each of those components via (1)\n",
+        "[EncoderScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py) and (2) [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xsMgEVHAui11"
+      },
+      "source": [
+        "### Use EncoderScaffold\n",
+        "\n",
+        "`EncoderScaffold` allows users to provide a custom embedding subnetwork\n",
+        "  (which will replace the standard embedding logic) and/or a custom hidden layer class (which will replace the `Transformer` instantiation in the encoder)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-JBabpa2AOz8"
+      },
+      "source": [
+        "#### Without Customization\n",
+        "\n",
+        "Without any customization, `EncoderScaffold` behaves the same the canonical `BertEncoder`.\n",
+        "\n",
+        "As shown in the following example, `EncoderScaffold` can load `BertEncoder`'s weights and output the same values:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ktNzKuVByZQf"
+      },
+      "source": [
+        "default_hidden_cfg = dict(\n",
+        "    num_attention_heads=cfg[\"num_attention_heads\"],\n",
+        "    intermediate_size=cfg[\"intermediate_size\"],\n",
+        "    intermediate_activation=activations.gelu,\n",
+        "    dropout_rate=cfg[\"dropout_rate\"],\n",
+        "    attention_dropout_rate=cfg[\"attention_dropout_rate\"],\n",
+        "    kernel_initializer=tf.keras.initializers.TruncatedNormal(0.02),\n",
+        ")\n",
+        "default_embedding_cfg = dict(\n",
+        "    vocab_size=cfg[\"vocab_size\"],\n",
+        "    type_vocab_size=cfg[\"type_vocab_size\"],\n",
+        "    hidden_size=cfg[\"hidden_size\"],\n",
+        "    initializer=tf.keras.initializers.TruncatedNormal(0.02),\n",
+        "    dropout_rate=cfg[\"dropout_rate\"],\n",
+        "    max_seq_length=cfg[\"max_sequence_length\"]\n",
+        ")\n",
+        "default_kwargs = dict(\n",
+        "    hidden_cfg=default_hidden_cfg,\n",
+        "    embedding_cfg=default_embedding_cfg,\n",
+        "    num_hidden_instances=cfg[\"num_layers\"],\n",
+        "    pooled_output_dim=cfg[\"hidden_size\"],\n",
+        "    return_all_layer_outputs=True,\n",
+        "    pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(0.02),\n",
+        ")\n",
+        "\n",
+        "encoder_scaffold = modeling.networks.EncoderScaffold(**default_kwargs)\n",
+        "classifier_model_from_encoder_scaffold = build_classifier(encoder_scaffold)\n",
+        "classifier_model_from_encoder_scaffold.set_weights(\n",
+        "    canonical_classifier_model.get_weights())\n",
+        "predict(classifier_model_from_encoder_scaffold)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sMaUmLyIuwcs"
+      },
+      "source": [
+        "#### Customize Embedding\n",
+        "\n",
+        "Next, we show how to use a customized embedding network.\n",
+        "\n",
+        "We firstly build an embedding network that will replace the default network. This one will have 2 inputs (`mask` and `word_ids`) instead of 3, and won't use positional embeddings."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LTinnaG6vcsw"
+      },
+      "source": [
+        "word_ids = tf.keras.layers.Input(\n",
+        "    shape=(cfg['max_sequence_length'],), dtype=tf.int32, name=\"input_word_ids\")\n",
+        "mask = tf.keras.layers.Input(\n",
+        "    shape=(cfg['max_sequence_length'],), dtype=tf.int32, name=\"input_mask\")\n",
+        "embedding_layer = modeling.layers.OnDeviceEmbedding(\n",
+        "    vocab_size=cfg['vocab_size'],\n",
+        "    embedding_width=cfg['hidden_size'],\n",
+        "    initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),\n",
+        "    name=\"word_embeddings\")\n",
+        "word_embeddings = embedding_layer(word_ids)\n",
+        "attention_mask = layers.SelfAttentionMask()([word_embeddings, mask])\n",
+        "new_embedding_network = tf.keras.Model([word_ids, mask],\n",
+        "                                       [word_embeddings, attention_mask])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HN7_yu-6O3qI"
+      },
+      "source": [
+        "Inspecting `new_embedding_network`, we can see it takes two inputs:\n",
+        "`input_word_ids` and `input_mask`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fO9zKFE4OpHp"
+      },
+      "source": [
+        "tf.keras.utils.plot_model(new_embedding_network, show_shapes=True, dpi=48)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9cOaGQHLv12W"
+      },
+      "source": [
+        "We then can build a new encoder using the above `new_embedding_network`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "mtFDMNf2vIl9"
+      },
+      "source": [
+        "kwargs = dict(default_kwargs)\n",
+        "\n",
+        "# Use new embedding network.\n",
+        "kwargs['embedding_cls'] = new_embedding_network\n",
+        "kwargs['embedding_data'] = embedding_layer.embeddings\n",
+        "\n",
+        "encoder_with_customized_embedding = modeling.networks.EncoderScaffold(**kwargs)\n",
+        "classifier_model = build_classifier(encoder_with_customized_embedding)\n",
+        "# ... Train the model ...\n",
+        "print(classifier_model.inputs)\n",
+        "\n",
+        "# Assert that there are only two inputs.\n",
+        "assert len(classifier_model.inputs) == 2"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Z73ZQDtmwg9K"
+      },
+      "source": [
+        "#### Customized Transformer\n",
+        "\n",
+        "User can also override the [hidden_cls](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py#L103) argument in `EncoderScaffold`'s constructor to employ a customized Transformer layer.\n",
+        "\n",
+        "See [ReZeroTransformer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/rezero_transformer.py) for how to implement a customized Transformer layer.\n",
+        "\n",
+        "Following is an example of using `ReZeroTransformer`:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uAIarLZgw6pA"
+      },
+      "source": [
+        "kwargs = dict(default_kwargs)\n",
+        "\n",
+        "# Use ReZeroTransformer.\n",
+        "kwargs['hidden_cls'] = modeling.layers.ReZeroTransformer\n",
+        "\n",
+        "encoder_with_rezero_transformer = modeling.networks.EncoderScaffold(**kwargs)\n",
+        "classifier_model = build_classifier(encoder_with_rezero_transformer)\n",
+        "# ... Train the model ...\n",
+        "predict(classifier_model)\n",
+        "\n",
+        "# Assert that the variable `rezero_alpha` from ReZeroTransformer exists.\n",
+        "assert 'rezero_alpha' in ''.join([x.name for x in classifier_model.trainable_weights])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6PMHFdvnxvR0"
+      },
+      "source": [
+        "### Use [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py)\n",
+        "\n",
+        "The above method of customizing `Transformer` requires rewriting the whole `Transformer` layer, while sometimes you may only want to customize either attention layer or feedforward block. In this case, [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py) can be used.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D6FejlgwyAy_"
+      },
+      "source": [
+        "#### Customize Attention Layer\n",
+        "\n",
+        "User can also override the [attention_cls](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py#L45) argument in `TransformerScaffold`'s constructor to employ a customized Attention layer.\n",
+        "\n",
+        "See [TalkingHeadsAttention](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/talking_heads_attention.py) for how to implement a customized `Attention` layer.\n",
+        "\n",
+        "Following is an example of using [TalkingHeadsAttention](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/talking_heads_attention.py):"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nFrSMrZuyNeQ"
+      },
+      "source": [
+        "# Use TalkingHeadsAttention\n",
+        "hidden_cfg = dict(default_hidden_cfg)\n",
+        "hidden_cfg['attention_cls'] = modeling.layers.TalkingHeadsAttention\n",
+        "\n",
+        "kwargs = dict(default_kwargs)\n",
+        "kwargs['hidden_cls'] = modeling.layers.TransformerScaffold\n",
+        "kwargs['hidden_cfg'] = hidden_cfg\n",
+        "\n",
+        "encoder = modeling.networks.EncoderScaffold(**kwargs)\n",
+        "classifier_model = build_classifier(encoder)\n",
+        "# ... Train the model ...\n",
+        "predict(classifier_model)\n",
+        "\n",
+        "# Assert that the variable `pre_softmax_weight` from TalkingHeadsAttention exists.\n",
+        "assert 'pre_softmax_weight' in ''.join([x.name for x in classifier_model.trainable_weights])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kuEJcTyByVvI"
+      },
+      "source": [
+        "#### Customize Feedforward Layer\n",
+        "\n",
+        "Similiarly, one could also customize the feedforward layer.\n",
+        "\n",
+        "See [GatedFeedforward](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/gated_feedforward.py) for how to implement a customized feedforward layer.\n",
+        "\n",
+        "Following is an example of using [GatedFeedforward](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/gated_feedforward.py)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XAbKy_l4y_-i"
+      },
+      "source": [
+        "# Use TalkingHeadsAttention\n",
+        "hidden_cfg = dict(default_hidden_cfg)\n",
+        "hidden_cfg['feedforward_cls'] = modeling.layers.GatedFeedforward\n",
+        "\n",
+        "kwargs = dict(default_kwargs)\n",
+        "kwargs['hidden_cls'] = modeling.layers.TransformerScaffold\n",
+        "kwargs['hidden_cfg'] = hidden_cfg\n",
+        "\n",
+        "encoder_with_gated_feedforward = modeling.networks.EncoderScaffold(**kwargs)\n",
+        "classifier_model = build_classifier(encoder_with_gated_feedforward)\n",
+        "# ... Train the model ...\n",
+        "predict(classifier_model)\n",
+        "\n",
+        "# Assert that the variable `gate` from GatedFeedforward exists.\n",
+        "assert 'gate' in ''.join([x.name for x in classifier_model.trainable_weights])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "a_8NWUhkzeAq"
+      },
+      "source": [
+        "### Build a new Encoder using building blocks from KerasBERT.\n",
+        "\n",
+        "Finally, you could also build a new encoder using building blocks in the modeling library.\n",
+        "\n",
+        "See [AlbertEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/albert_encoder.py) as an example:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xsiA3RzUzmUM"
+      },
+      "source": [
+        "albert_encoder = modeling.networks.AlbertEncoder(**cfg)\n",
+        "classifier_model = build_classifier(albert_encoder)\n",
+        "# ... Train the model ...\n",
+        "predict(classifier_model)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MeidDfhlHKSO"
+      },
+      "source": [
+        "Inspecting the `albert_encoder`, we see it stacks the same `Transformer` layer multiple times."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Uv_juT22HERW"
+      },
+      "source": [
+        "tf.keras.utils.plot_model(albert_encoder, show_shapes=True, dpi=48)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/colab/nlp/nlp_modeling_library_intro.ipynb
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/colab/nlp/nlp_modeling_library_intro.ipynb
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "80xnUmoI7fBX"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "8nvTnfs6Q692"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WmfcMK5P5C1G"
+      },
+      "source": [
+        "# Introduction to the TensorFlow Models NLP library"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cH-oJ8R6AHMK"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/nlp/nlp_modeling_library_intro\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0H_EFIhq4-MJ"
+      },
+      "source": [
+        "## Learning objectives\n",
+        "\n",
+        "In this Colab notebook, you will learn how to build transformer-based models for common NLP tasks including pretraining, span labelling and classification using the building blocks from [NLP modeling library](https://github.com/tensorflow/models/tree/master/official/nlp/modeling)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2N97-dps_nUk"
+      },
+      "source": [
+        "## Install and import"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "459ygAVl_rg0"
+      },
+      "source": [
+        "### Install the TensorFlow Model Garden pip package\n",
+        "\n",
+        "*  `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
+        "which is the nightly Model Garden package created daily automatically.\n",
+        "*  `pip` will install all models and dependencies automatically."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Y-qGkdh6_sZc"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q tf-models-official==2.4.0"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e4huSSwyAG_5"
+      },
+      "source": [
+        "### Import Tensorflow and other libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jqYXqtjBAJd9"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from official.nlp import modeling\n",
+        "from official.nlp.modeling import layers, losses, models, networks"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "djBQWjvy-60Y"
+      },
+      "source": [
+        "## BERT pretraining model\n",
+        "\n",
+        "BERT ([Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805)) introduced the method of pre-training language representations on a large text corpus and then using that model for downstream NLP tasks.\n",
+        "\n",
+        "In this section, we will learn how to build a model to pretrain BERT on the masked language modeling task and next sentence prediction task. For simplicity, we only show the minimum example and use dummy data."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MKuHVlsCHmiq"
+      },
+      "source": [
+        "### Build a `BertPretrainer` model wrapping `BertEncoder`\n",
+        "\n",
+        "The [BertEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/bert_encoder.py) implements the Transformer-based encoder as described in [BERT paper](https://arxiv.org/abs/1810.04805). It includes the embedding lookups and transformer layers, but not the masked language model or classification task networks.\n",
+        "\n",
+        "The [BertPretrainer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_pretrainer.py) allows a user to pass in a transformer stack, and instantiates the masked language model and classification networks that are used to create the training objectives."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EXkcXz-9BwB3"
+      },
+      "outputs": [],
+      "source": [
+        "# Build a small transformer network.\n",
+        "vocab_size = 100\n",
+        "sequence_length = 16\n",
+        "network = modeling.networks.BertEncoder(\n",
+        "    vocab_size=vocab_size, num_layers=2, sequence_length=16)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0NH5irV5KTMS"
+      },
+      "source": [
+        "Inspecting the encoder, we see it contains few embedding layers, stacked `Transformer` layers and are connected to three input layers:\n",
+        "\n",
+        "`input_word_ids`, `input_type_ids` and `input_mask`.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lZNoZkBrIoff"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(network, show_shapes=True, dpi=48)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "o7eFOZXiIl-b"
+      },
+      "outputs": [],
+      "source": [
+        "# Create a BERT pretrainer with the created network.\n",
+        "num_token_predictions = 8\n",
+        "bert_pretrainer = modeling.models.BertPretrainer(\n",
+        "    network, num_classes=2, num_token_predictions=num_token_predictions, output='predictions')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d5h5HT7gNHx_"
+      },
+      "source": [
+        "Inspecting the `bert_pretrainer`, we see it wraps the `encoder` with additional `MaskedLM` and `Classification` heads."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2tcNfm03IBF7"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(bert_pretrainer, show_shapes=True, dpi=48)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "F2oHrXGUIS0M"
+      },
+      "outputs": [],
+      "source": [
+        "# We can feed some dummy data to get masked language model and sentence output.\n",
+        "batch_size = 2\n",
+        "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n",
+        "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "masked_lm_positions_data = np.random.randint(2, size=(batch_size, num_token_predictions))\n",
+        "\n",
+        "outputs = bert_pretrainer(\n",
+        "    [word_id_data, mask_data, type_id_data, masked_lm_positions_data])\n",
+        "lm_output = outputs[\"masked_lm\"]\n",
+        "sentence_output = outputs[\"classification\"]\n",
+        "print(lm_output)\n",
+        "print(sentence_output)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bnx3UCHniCS5"
+      },
+      "source": [
+        "### Compute loss\n",
+        "Next, we can use `lm_output` and `sentence_output` to compute `loss`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "k30H4Q86f52x"
+      },
+      "outputs": [],
+      "source": [
+        "masked_lm_ids_data = np.random.randint(vocab_size, size=(batch_size, num_token_predictions))\n",
+        "masked_lm_weights_data = np.random.randint(2, size=(batch_size, num_token_predictions))\n",
+        "next_sentence_labels_data = np.random.randint(2, size=(batch_size))\n",
+        "\n",
+        "mlm_loss = modeling.losses.weighted_sparse_categorical_crossentropy_loss(\n",
+        "    labels=masked_lm_ids_data,\n",
+        "    predictions=lm_output,\n",
+        "    weights=masked_lm_weights_data)\n",
+        "sentence_loss = modeling.losses.weighted_sparse_categorical_crossentropy_loss(\n",
+        "    labels=next_sentence_labels_data,\n",
+        "    predictions=sentence_output)\n",
+        "loss = mlm_loss + sentence_loss\n",
+        "print(loss)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wrmSs8GjHxVw"
+      },
+      "source": [
+        "With the loss, you can optimize the model.\n",
+        "After training, we can save the weights of TransformerEncoder for the downstream fine-tuning tasks. Please see [run_pretraining.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_pretraining.py) for the full example.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k8cQVFvBCV4s"
+      },
+      "source": [
+        "## Span labeling model\n",
+        "\n",
+        "Span labeling is the task to assign labels to a span of the text, for example, label a span of text as the answer of a given question.\n",
+        "\n",
+        "In this section, we will learn how to build a span labeling model. Again, we use dummy data for simplicity."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xrLLEWpfknUW"
+      },
+      "source": [
+        "### Build a BertSpanLabeler wrapping BertEncoder\n",
+        "\n",
+        "[BertSpanLabeler](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_span_labeler.py) implements a simple single-span start-end predictor (that is, a model that predicts two values: a start token index and an end token index), suitable for SQuAD-style tasks.\n",
+        "\n",
+        "Note that `BertSpanLabeler` wraps a `BertEncoder`, the weights of which can be restored from the above pretraining model.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "B941M4iUCejO"
+      },
+      "outputs": [],
+      "source": [
+        "network = modeling.networks.BertEncoder(\n",
+        "        vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)\n",
+        "\n",
+        "# Create a BERT trainer with the created network.\n",
+        "bert_span_labeler = modeling.models.BertSpanLabeler(network)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QpB9pgj4PpMg"
+      },
+      "source": [
+        "Inspecting the `bert_span_labeler`, we see it wraps the encoder with additional `SpanLabeling` that outputs `start_position` and `end_postion`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RbqRNJCLJu4H"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(bert_span_labeler, show_shapes=True, dpi=48)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fUf1vRxZJwio"
+      },
+      "outputs": [],
+      "source": [
+        "# Create a set of 2-dimensional data tensors to feed into the model.\n",
+        "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n",
+        "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "\n",
+        "# Feed the data to the model.\n",
+        "start_logits, end_logits = bert_span_labeler([word_id_data, mask_data, type_id_data])\n",
+        "print(start_logits)\n",
+        "print(end_logits)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WqhgQaN1lt-G"
+      },
+      "source": [
+        "### Compute loss\n",
+        "With `start_logits` and `end_logits`, we can compute loss:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "waqs6azNl3Nn"
+      },
+      "outputs": [],
+      "source": [
+        "start_positions = np.random.randint(sequence_length, size=(batch_size))\n",
+        "end_positions = np.random.randint(sequence_length, size=(batch_size))\n",
+        "\n",
+        "start_loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "    start_positions, start_logits, from_logits=True)\n",
+        "end_loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "    end_positions, end_logits, from_logits=True)\n",
+        "\n",
+        "total_loss = (tf.reduce_mean(start_loss) + tf.reduce_mean(end_loss)) / 2\n",
+        "print(total_loss)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Zdf03YtZmd_d"
+      },
+      "source": [
+        "With the `loss`, you can optimize the model. Please see [run_squad.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_squad.py) for the full example."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0A1XnGSTChg9"
+      },
+      "source": [
+        "## Classification model\n",
+        "\n",
+        "In the last section, we show how to build a text classification model.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MSK8OpZgnQa9"
+      },
+      "source": [
+        "### Build a BertClassifier model wrapping BertEncoder\n",
+        "\n",
+        "[BertClassifier](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_classifier.py) implements a [CLS] token classification model containing a single classification head."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cXXCsffkCphk"
+      },
+      "outputs": [],
+      "source": [
+        "network = modeling.networks.BertEncoder(\n",
+        "        vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)\n",
+        "\n",
+        "# Create a BERT trainer with the created network.\n",
+        "num_classes = 2\n",
+        "bert_classifier = modeling.models.BertClassifier(\n",
+        "    network, num_classes=num_classes)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8tZKueKYP4bB"
+      },
+      "source": [
+        "Inspecting the `bert_classifier`, we see it wraps the `encoder` with additional `Classification` head."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "snlutm9ZJgEZ"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(bert_classifier, show_shapes=True, dpi=48)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yyHPHsqBJkCz"
+      },
+      "outputs": [],
+      "source": [
+        "# Create a set of 2-dimensional data tensors to feed into the model.\n",
+        "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n",
+        "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "\n",
+        "# Feed the data to the model.\n",
+        "logits = bert_classifier([word_id_data, mask_data, type_id_data])\n",
+        "print(logits)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w--a2mg4nzKm"
+      },
+      "source": [
+        "### Compute loss\n",
+        "\n",
+        "With `logits`, we can compute `loss`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9X0S1DoFn_5Q"
+      },
+      "outputs": [],
+      "source": [
+        "labels = np.random.randint(num_classes, size=(batch_size))\n",
+        "\n",
+        "loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "    labels, logits, from_logits=True)\n",
+        "print(loss)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mzBqOylZo3og"
+      },
+      "source": [
+        "With the `loss`, you can optimize the model. Please see [run_classifier.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py) or the colab [fine_tuning_bert.ipynb](https://github.com/tensorflow/models/blob/master/official/colab/fine_tuning_bert.ipynb) for the full example."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "Introduction to the TensorFlow Models NLP library",
+      "private_outputs": true,
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/__init__.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/dataset_fn.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/dataset_fn.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility library for picking an appropriate dataset function."""
+from typing import Any, Callable, Union, Type
+import tensorflow as tf
+PossibleDatasetType = Union[Type[tf.data.Dataset], Callable[[tf.Tensor], Any]]
+def pick_dataset_fn(file_type: str) -> PossibleDatasetType:
+  if file_type == 'tfrecord':
+    return tf.data.TFRecordDataset
+  raise ValueError('Unrecognized file_type: {}'.format(file_type))
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/distribute_utils.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/distribute_utils.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions for running models in a distributed setting."""
+import json
+import os
+import tensorflow as tf
+def _collective_communication(all_reduce_alg):
+  """Return a CollectiveCommunication based on all_reduce_alg.
+  Args:
+    all_reduce_alg: a string specifying which collective communication to pick,
+      or None.
+  Returns:
+    tf.distribute.experimental.CollectiveCommunication object
+  Raises:
+    ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"]
+  """
+  collective_communication_options = {
+      None: tf.distribute.experimental.CollectiveCommunication.AUTO,
+      "ring": tf.distribute.experimental.CollectiveCommunication.RING,
+      "nccl": tf.distribute.experimental.CollectiveCommunication.NCCL
+  }
+  if all_reduce_alg not in collective_communication_options:
+    raise ValueError(
+        "When used with `multi_worker_mirrored`, valid values for "
+        "all_reduce_alg are [`ring`, `nccl`].  Supplied value: {}".format(
+            all_reduce_alg))
+  return collective_communication_options[all_reduce_alg]
+def _mirrored_cross_device_ops(all_reduce_alg, num_packs):
+  """Return a CrossDeviceOps based on all_reduce_alg and num_packs.
+  Args:
+    all_reduce_alg: a string specifying which cross device op to pick, or None.
+    num_packs: an integer specifying number of packs for the cross device op.
+  Returns:
+    tf.distribute.CrossDeviceOps object or None.
+  Raises:
+    ValueError: if `all_reduce_alg` not in [None, "nccl", "hierarchical_copy"].
+  """
+  if all_reduce_alg is None:
+    return None
+  mirrored_all_reduce_options = {
+      "nccl": tf.distribute.NcclAllReduce,
+      "hierarchical_copy": tf.distribute.HierarchicalCopyAllReduce
+  }
+  if all_reduce_alg not in mirrored_all_reduce_options:
+    raise ValueError(
+        "When used with `mirrored`, valid values for all_reduce_alg are "
+        "[`nccl`, `hierarchical_copy`].  Supplied value: {}".format(
+            all_reduce_alg))
+  cross_device_ops_class = mirrored_all_reduce_options[all_reduce_alg]
+  return cross_device_ops_class(num_packs=num_packs)
+def tpu_initialize(tpu_address):
+  """Initializes TPU for TF 2.x training.
+  Args:
+    tpu_address: string, bns address of master TPU worker.
+  Returns:
+    A TPUClusterResolver.
+  """
+  cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
+      tpu=tpu_address)
+  if tpu_address not in ("", "local"):
+    tf.config.experimental_connect_to_cluster(cluster_resolver)
+  tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
+  return cluster_resolver
+def get_distribution_strategy(distribution_strategy="mirrored",
+                              num_gpus=0,
+                              all_reduce_alg=None,
+                              num_packs=1,
+                              tpu_address=None,
+                              **kwargs):
+  """Return a DistributionStrategy for running the model.
+  Args:
+    distribution_strategy: a string specifying which distribution strategy to
+      use. Accepted values are "off", "one_device", "mirrored",
+      "parameter_server", "multi_worker_mirrored", and "tpu" -- case
+      insensitive. "tpu" means to use TPUStrategy using `tpu_address`.
+      "off" means to use the default strategy which is obtained from
+      tf.distribute.get_strategy (for details on the default strategy, see
+      https://www.tensorflow.org/guide/distributed_training#default_strategy).
+    num_gpus: Number of GPUs to run this model.
+    all_reduce_alg: Optional. Specifies which algorithm to use when performing
+      all-reduce. For `MirroredStrategy`, valid values are "nccl" and
+      "hierarchical_copy". For `MultiWorkerMirroredStrategy`, valid values are
+      "ring" and "nccl".  If None, DistributionStrategy will choose based on
+      device topology.
+    num_packs: Optional.  Sets the `num_packs` in `tf.distribute.NcclAllReduce`
+      or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
+    tpu_address: Optional. String that represents TPU to connect to. Must not be
+      None if `distribution_strategy` is set to `tpu`.
+    **kwargs: Additional kwargs for internal usages.
+  Returns:
+    tf.distribute.DistibutionStrategy object.
+  Raises:
+    ValueError: if `distribution_strategy` is "off" or "one_device" and
+      `num_gpus` is larger than 1; or `num_gpus` is negative or if
+      `distribution_strategy` is `tpu` but `tpu_address` is not specified.
+  """
+  del kwargs
+  if num_gpus < 0:
+    raise ValueError("`num_gpus` can not be negative.")
+  if not isinstance(distribution_strategy, str):
+    msg = ("distribution_strategy must be a string but got: %s." %
+           (distribution_strategy,))
+    if distribution_strategy == False:  # pylint: disable=singleton-comparison,g-explicit-bool-comparison
+      msg += (" If you meant to pass the string 'off', make sure you add "
+              "quotes around 'off' so that yaml interprets it as a string "
+              "instead of a bool.")
+    raise ValueError(msg)
+  distribution_strategy = distribution_strategy.lower()
+  if distribution_strategy == "off":
+    if num_gpus > 1:
+      raise ValueError(f"When {num_gpus} GPUs are specified, "
+                       "distribution_strategy flag cannot be set to `off`.")
+    # Return the default distribution strategy.
+    return tf.distribute.get_strategy()
+  if distribution_strategy == "tpu":
+    # When tpu_address is an empty string, we communicate with local TPUs.
+    cluster_resolver = tpu_initialize(tpu_address)
+    return tf.distribute.TPUStrategy(cluster_resolver)
+  if distribution_strategy == "multi_worker_mirrored":
+    return tf.distribute.experimental.MultiWorkerMirroredStrategy(
+        communication=_collective_communication(all_reduce_alg))
+  if distribution_strategy == "one_device":
+    if num_gpus == 0:
+      return tf.distribute.OneDeviceStrategy("device:CPU:0")
+    if num_gpus > 1:
+      raise ValueError("`OneDeviceStrategy` can not be used for more than "
+                       "one device.")
+    return tf.distribute.OneDeviceStrategy("device:GPU:0")
+  if distribution_strategy == "mirrored":
+    if num_gpus == 0:
+      devices = ["device:CPU:0"]
+    else:
+      devices = ["device:GPU:%d" % i for i in range(num_gpus)]
+    return tf.distribute.MirroredStrategy(
+        devices=devices,
+        cross_device_ops=_mirrored_cross_device_ops(all_reduce_alg, num_packs))
+  if distribution_strategy == "parameter_server":
+    cluster_resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver()
+    return tf.distribute.experimental.ParameterServerStrategy(cluster_resolver)
+  raise ValueError("Unrecognized Distribution Strategy: %r" %
+                   distribution_strategy)
+def configure_cluster(worker_hosts=None, task_index=-1):
+  """Set multi-worker cluster spec in TF_CONFIG environment variable.
+  Args:
+    worker_hosts: comma-separated list of worker ip:port pairs.
+    task_index: index of the worker.
+  Returns:
+    Number of workers in the cluster.
+  """
+  tf_config = json.loads(os.environ.get("TF_CONFIG", "{}"))
+  if tf_config:
+    num_workers = (
+        len(tf_config["cluster"].get("chief", [])) +
+        len(tf_config["cluster"].get("worker", [])))
+  elif worker_hosts:
+    workers = worker_hosts.split(",")
+    num_workers = len(workers)
+    if num_workers > 1 and task_index < 0:
+      raise ValueError("Must specify task_index when number of workers > 1")
+    task_index = 0 if num_workers == 1 else task_index
+    os.environ["TF_CONFIG"] = json.dumps({
+        "cluster": {
+            "worker": workers
+        },
+        "task": {
+            "type": "worker",
+            "index": task_index
+        }
+    })
+  else:
+    num_workers = 1
+  return num_workers
+def get_strategy_scope(strategy):
+  if strategy:
+    strategy_scope = strategy.scope()
+  else:
+    strategy_scope = DummyContextManager()
+  return strategy_scope
+class DummyContextManager(object):
+  def __enter__(self):
+    pass
+  def __exit__(self, *args):
+    pass
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/distribute_utils_test.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/distribute_utils_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for distribution util functions."""
+import tensorflow as tf
+from official.common import distribute_utils
+class DistributeUtilsTest(tf.test.TestCase):
+  """Tests for distribute util functions."""
+  def test_invalid_args(self):
+    with self.assertRaisesRegex(ValueError, '`num_gpus` can not be negative.'):
+      _ = distribute_utils.get_distribution_strategy(num_gpus=-1)
+    with self.assertRaisesRegex(ValueError,
+                                '.*If you meant to pass the string .*'):
+      _ = distribute_utils.get_distribution_strategy(
+          distribution_strategy=False, num_gpus=0)
+    with self.assertRaisesRegex(ValueError, 'When 2 GPUs are specified.*'):
+      _ = distribute_utils.get_distribution_strategy(
+          distribution_strategy='off', num_gpus=2)
+    with self.assertRaisesRegex(ValueError,
+                                '`OneDeviceStrategy` can not be used.*'):
+      _ = distribute_utils.get_distribution_strategy(
+          distribution_strategy='one_device', num_gpus=2)
+  def test_one_device_strategy_cpu(self):
+    ds = distribute_utils.get_distribution_strategy('one_device', num_gpus=0)
+    self.assertEquals(ds.num_replicas_in_sync, 1)
+    self.assertEquals(len(ds.extended.worker_devices), 1)
+    self.assertIn('CPU', ds.extended.worker_devices[0])
+  def test_one_device_strategy_gpu(self):
+    ds = distribute_utils.get_distribution_strategy('one_device', num_gpus=1)
+    self.assertEquals(ds.num_replicas_in_sync, 1)
+    self.assertEquals(len(ds.extended.worker_devices), 1)
+    self.assertIn('GPU', ds.extended.worker_devices[0])
+  def test_mirrored_strategy(self):
+    ds = distribute_utils.get_distribution_strategy(num_gpus=5)
+    self.assertEquals(ds.num_replicas_in_sync, 5)
+    self.assertEquals(len(ds.extended.worker_devices), 5)
+    for device in ds.extended.worker_devices:
+      self.assertIn('GPU', device)
+    _ = distribute_utils.get_distribution_strategy(
+        distribution_strategy='mirrored',
+        num_gpus=2,
+        all_reduce_alg='nccl',
+        num_packs=2)
+    with self.assertRaisesRegex(
+        ValueError,
+        'When used with `mirrored`, valid values for all_reduce_alg are.*'):
+      _ = distribute_utils.get_distribution_strategy(
+          distribution_strategy='mirrored',
+          num_gpus=2,
+          all_reduce_alg='dummy',
+          num_packs=2)
+  def test_mwms(self):
+    distribute_utils.configure_cluster(worker_hosts=None, task_index=-1)
+    ds = distribute_utils.get_distribution_strategy(
+        'multi_worker_mirrored', all_reduce_alg='nccl')
+    self.assertIsInstance(
+        ds, tf.distribute.experimental.MultiWorkerMirroredStrategy)
+  def test_no_strategy(self):
+    ds = distribute_utils.get_distribution_strategy('off')
+    self.assertIs(ds, tf.distribute.get_strategy())
+  def test_invalid_strategy(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        'distribution_strategy must be a string but got: False. If'):
+      distribute_utils.get_distribution_strategy(False)
+    with self.assertRaisesRegexp(
+        ValueError, 'distribution_strategy must be a string but got: 1'):
+      distribute_utils.get_distribution_strategy(1)
+  def test_get_strategy_scope(self):
+    ds = distribute_utils.get_distribution_strategy('one_device', num_gpus=0)
+    with distribute_utils.get_strategy_scope(ds):
+      self.assertIs(tf.distribute.get_strategy(), ds)
+    with distribute_utils.get_strategy_scope(None):
+      self.assertIsNot(tf.distribute.get_strategy(), ds)
+if __name__ == '__main__':
+  tf.test.main()
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/flags.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/flags.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The central place to define flags."""
+from absl import flags
+def define_flags():
+  """Defines flags.
+  All flags are defined as optional, but in practice most models use some of
+  these flags and so mark_flags_as_required() should be called after calling
+  this function. Typically, 'experiment', 'mode', and 'model_dir' are required.
+  For example:
+  ```
+  from absl import flags
+  from official.common import flags as tfm_flags  # pylint: disable=line-too-long
+  ...
+  tfm_flags.define_flags()
+  flags.mark_flags_as_required(['experiment', 'mode', 'model_dir'])
+  ```
+  The reason all flags are optional is because unit tests often do not set or
+  use any of the flags.
+  """
+  flags.DEFINE_string(
+      'experiment', default=None, help=
+      'The experiment type registered, specifying an ExperimentConfig.')
+  flags.DEFINE_enum(
+      'mode',
+      default=None,
+      enum_values=[
+          'train', 'eval', 'train_and_eval', 'continuous_eval',
+          'continuous_train_and_eval', 'train_and_validate'
+      ],
+      help='Mode to run: `train`, `eval`, `train_and_eval`, '
+      '`continuous_eval`, `continuous_train_and_eval` and '
+      '`train_and_validate` (which is not implemented in '
+      'the open source version).')
+  flags.DEFINE_string(
+      'model_dir',
+      default=None,
+      help='The directory where the model and training/evaluation summaries'
+      'are stored.')
+  flags.DEFINE_multi_string(
+      'config_file',
+      default=None,
+      help='YAML/JSON files which specifies overrides. The override order '
+      'follows the order of args. Note that each file '
+      'can be used as an override template to override the default parameters '
+      'specified in Python. If the same parameter is specified in both '
+      '`--config_file` and `--params_override`, `config_file` will be used '
+      'first, followed by params_override.')
+  flags.DEFINE_string(
+      'params_override',
+      default=None,
+      help='a YAML/JSON string or a YAML file which specifies additional '
+      'overrides over the default parameters and those specified in '
+      '`--config_file`. Note that this is supposed to be used only to override '
+      'the model parameters, but not the parameters like TPU specific flags. '
+      'One canonical use case of `--config_file` and `--params_override` is '
+      'users first define a template config file using `--config_file`, then '
+      'use `--params_override` to adjust the minimal set of tuning parameters, '
+      'for example setting up different `train_batch_size`. The final override '
+      'order of parameters: default_model_params --> params from config_file '
+      '--> params in params_override. See also the help message of '
+      '`--config_file`.')
+  # The libraries rely on gin often make mistakes that include flags inside
+  # the library files which causes conflicts.
+  try:
+    flags.DEFINE_multi_string(
+        'gin_file', default=None, help='List of paths to the config files.')
+  except flags.DuplicateFlagError:
+    pass
+  try:
+    flags.DEFINE_multi_string(
+        'gin_params',
+        default=None,
+        help='Newline separated list of Gin parameter bindings.')
+  except flags.DuplicateFlagError:
+    pass
+  flags.DEFINE_string(
+      'tpu',
+      default=None,
+      help='The Cloud TPU to use for training. This should be either the name '
+      'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 '
+      'url.')
+  flags.DEFINE_string(
+      'tf_data_service', default=None, help='The tf.data service address')
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/registry_imports.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/registry_imports.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""All necessary imports for registration."""
+# pylint: disable=unused-import
+from official.nlp import tasks
+from official.nlp.configs import experiment_configs
+from official.utils.testing import mock_task
+from official.vision import beta
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/streamz_counters.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/common/streamz_counters.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Global streamz counters."""
+from tensorflow.python.eager import monitoring
+progressive_policy_creation_counter = monitoring.Counter(
+    "/tensorflow/training/fast_training/progressive_policy_creation",
+    "Counter for the number of ProgressivePolicy creations.")
+stack_vars_to_vars_call_counter = monitoring.Counter(
+    "/tensorflow/training/fast_training/tf_vars_to_vars",
+    "Counter for the number of low-level stacking API calls.")
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/__init__.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/actions.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/actions.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Provides TFM orbit actions and associated helper functions/classes."""
+import os
+from typing import List
+from absl import logging
+import gin
+import orbit
+import tensorflow as tf
+import tensorflow_model_optimization as tfmot
+from official.core import base_trainer
+from official.core import config_definitions
+from official.modeling import optimization
+class PruningActions:
+  """Train action to updates pruning related information.
+  This action updates pruning steps at the end of trainig loop, and log
+    pruning metrics to tensorboard.
+  This action must be used when training a pruned model to avoid pruning error.
+  """
+  def __init__(
+      self,
+      export_dir: str,
+      model: tf.keras.Model,
+      optimizer: tf.keras.optimizers.Optimizer,
+  ):
+    """Initializes the instance.
+    Args:
+      export_dir: `str` for the export directory of the pruning summaries.
+      model: `tf.keras.Model` model instance used for training. This will be
+        used to assign a pruning step to each prunable weight.
+      optimizer: `tf.keras.optimizers.Optimizer` optimizer instance used for
+        training. This will be used to find the current training steps.
+    """
+    self._optimizer = optimizer
+    self.update_pruning_step = tfmot.sparsity.keras.UpdatePruningStep()
+    self.update_pruning_step.set_model(model)
+    self.update_pruning_step.on_train_begin()
+    self.pruning_summaries = tfmot.sparsity.keras.PruningSummaries(
+        log_dir=export_dir)
+    model.optimizer = optimizer
+    self.pruning_summaries.set_model(model)
+  def __call__(self, output: orbit.runner.Output):
+    """Update pruning step and log pruning summaries.
+    Args:
+      output: The train output to test.
+    """
+    self.update_pruning_step.on_epoch_end(batch=None)
+    self.pruning_summaries.on_epoch_begin(epoch=None)
+class EMACheckpointing:
+  """Eval action to save checkpoint with average weights when EMA is used.
+  This action swaps the weights of the model with the average weights, then it
+  saves the checkpoint under export_dir/ema_checkpoints. Checkpointing is
+  expensive for large models, so doing this action in eval is more efficient
+  than training.
+  """
+  def __init__(self, export_dir: str, optimizer: tf.keras.optimizers.Optimizer,
+               checkpoint: tf.train.Checkpoint, max_to_keep: int = 1):
+    """Initializes the instance.
+    Args:
+      export_dir: `str` for the export directory of the EMA average weights.
+      optimizer: `tf.keras.optimizers.Optimizer` optimizer instance used for
+        training. This will be used to swap the model weights with the average
+        weigths.
+      checkpoint: `tf.train.Checkpoint` instance.
+      max_to_keep: `int` for max checkpoints to keep in ema_checkpoints subdir.
+    """
+    if not isinstance(optimizer, optimization.ExponentialMovingAverage):
+      raise ValueError('Optimizer has to be instance of'
+                       'optimization.ExponentialMovingAverage for'
+                       'EMACheckpointing action')
+    export_dir = os.path.join(export_dir, 'ema_checkpoints')
+    tf.io.gfile.makedirs(
+        os.path.dirname(export_dir))
+    self._optimizer = optimizer
+    self._checkpoint = checkpoint
+    self._checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        directory=export_dir,
+        max_to_keep=max_to_keep,
+        checkpoint_name='average_weights')
+  def __call__(self, output: orbit.runner.Output):
+    """Swaps model weights, and saves the checkpoint.
+    Args:
+      output: The train or eval output to test.
+    """
+    self._optimizer.swap_weights()
+    self._checkpoint_manager.save(checkpoint_number=self._optimizer.iterations)
+    self._optimizer.swap_weights()
+class RecoveryAction:
+  """Train action to recover from loss blowup.
+  Checks the loss value by the given threshold. If applicable, recover the
+  model by reading the checkpoint on disk.
+  """
+  def __init__(self, checkpoint_manager: tf.train.CheckpointManager):
+    self.checkpoint_manager = checkpoint_manager
+  def __call__(self, _):
+    """Recovers the training by triggering checkpoint restoration."""
+    # Loads the previous good checkpoint.
+    checkpoint_path = self.checkpoint_manager.restore_or_initialize()
+    logging.warning('Recovering the model from checkpoint: %s.',
+                    checkpoint_path)
+class RecoveryCondition:
+  """Recovery Condition."""
+  def __init__(self,
+               global_step: tf.Variable,
+               loss_upper_bound: float,
+               recovery_begin_steps: int = 0,
+               recovery_max_trials: int = 3):
+    self.recover_counter = 0
+    self.recovery_begin_steps = recovery_begin_steps
+    self.recovery_max_trials = recovery_max_trials
+    self.loss_upper_bound = loss_upper_bound
+    self.global_step = global_step
+  def __call__(self, outputs: orbit.runner.Output):
+    loss_value = outputs['training_loss']
+    if tf.math.is_nan(loss_value):
+      self.recover_counter += 1
+      if self.recover_counter > self.recovery_max_trials:
+        raise RuntimeError(
+            'The loss value is NaN after training loop and it happens %d times.'
+            % self.recover_counter)
+      return True
+    if (self.global_step >= self.recovery_begin_steps and
+        loss_value > self.loss_upper_bound):
+      self.recover_counter += 1
+      if self.recover_counter > self.recovery_max_trials:
+        raise RuntimeError(
+            f'The loss value is {loss_value}, which is larger than the bound {self.loss_upper_bound}, happens {self.recover_counter} times.'
+        )
+      return True
+    return False
+@gin.configurable
+def get_eval_actions(
+    params: config_definitions.ExperimentConfig,
+    trainer: base_trainer.Trainer,
+    model_dir: str) -> List[orbit.Action]:
+  """Gets eval actions for TFM trainer."""
+  eval_actions = []
+  # Adds ema checkpointing action to save the average weights under
+  # ema_checkpoints subdir.
+  if isinstance(trainer.optimizer, optimization.ExponentialMovingAverage):
+    eval_actions.append(
+        EMACheckpointing(
+            export_dir=model_dir,
+            optimizer=trainer.optimizer,
+            checkpoint=trainer.checkpoint,
+            max_to_keep=params.trainer.max_to_keep))
+  return eval_actions
+@gin.configurable
+def get_train_actions(
+    params: config_definitions.ExperimentConfig, trainer: base_trainer.Trainer,
+    model_dir: str,
+    checkpoint_manager: tf.train.CheckpointManager) -> List[orbit.Action]:
+  """Gets train actions for TFM trainer."""
+  train_actions = []
+  # Adds pruning callback actions.
+  if hasattr(params.task, 'pruning'):
+    train_actions.append(
+        PruningActions(
+            export_dir=model_dir,
+            model=trainer.model,
+            optimizer=trainer.optimizer))
+  if params.trainer.recovery_max_trials >= 0:
+    recovery_condition = RecoveryCondition(
+        global_step=trainer.global_step,
+        loss_upper_bound=params.trainer.loss_upper_bound,
+        recovery_begin_steps=params.trainer.recovery_begin_steps,
+        recovery_max_trials=params.trainer.recovery_max_trials,
+    )
+    recover_action = orbit.actions.ConditionalAction(
+        condition=recovery_condition,
+        action=RecoveryAction(checkpoint_manager),
+    )
+    train_actions.append(recover_action)
+  return train_actions
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/actions_test.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/actions_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for TFM actions."""
+import os
+from absl.testing import parameterized
+import numpy as np
+import orbit
+import tensorflow as tf
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from official.core import actions
+from official.modeling import optimization
+class TestModel(tf.Module):
+  def __init__(self):
+    self.value = tf.Variable(0)
+  @tf.function(input_signature=[])
+  def __call__(self):
+    return self.value
+class ActionsTest(tf.test.TestCase, parameterized.TestCase):
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              strategy_combinations.cloud_tpu_strategy,
+              strategy_combinations.one_device_strategy_gpu,
+          ],))
+  def test_ema_checkpointing(self, distribution):
+    with distribution.scope():
+      directory = self.create_tempdir()
+      model = TestModel()
+      optimizer = tf.keras.optimizers.SGD()
+      optimizer = optimization.ExponentialMovingAverage(
+          optimizer, trainable_weights_only=False)
+      # Creats average weights for the model variables. Average weights are
+      # initialized to zero.
+      optimizer.shadow_copy(model)
+      checkpoint = tf.train.Checkpoint(model=model)
+      # Changes model.value to 3, average value is still 0.
+      model.value.assign(3)
+      # Checks model.value is 3
+      self.assertEqual(model(), 3)
+      ema_action = actions.EMACheckpointing(directory, optimizer, checkpoint)
+      ema_action({})
+      self.assertNotEmpty(
+          tf.io.gfile.glob(os.path.join(directory, 'ema_checkpoints')))
+      checkpoint.read(tf.train.latest_checkpoint(
+          os.path.join(directory, 'ema_checkpoints')))
+      # Checks model.value is 0 after swapping.
+      self.assertEqual(model(), 0)
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              strategy_combinations.default_strategy,
+              strategy_combinations.cloud_tpu_strategy,
+              strategy_combinations.one_device_strategy_gpu,
+          ],))
+  def test_recovery_condition(self, distribution):
+    with distribution.scope():
+      global_step = orbit.utils.create_global_step()
+      recover_condition = actions.RecoveryCondition(
+          global_step, loss_upper_bound=0.5, recovery_max_trials=2)
+      outputs = {'training_loss': 0.6}
+      self.assertTrue(recover_condition(outputs))
+      self.assertTrue(recover_condition(outputs))
+      with self.assertRaises(RuntimeError):
+        recover_condition(outputs)
+      global_step = orbit.utils.create_global_step()
+      recover_condition = actions.RecoveryCondition(
+          global_step, loss_upper_bound=0.5, recovery_max_trials=2)
+      outputs = {'training_loss': tf.constant([np.nan], tf.float32)}
+      self.assertTrue(recover_condition(outputs))
+      self.assertTrue(recover_condition(outputs))
+      with self.assertRaises(RuntimeError):
+        recover_condition(outputs)
+if __name__ == '__main__':
+  tf.test.main()
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/base_task.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/base_task.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Defines the base task abstraction."""
+import abc
+from typing import Optional
+from absl import logging
+import tensorflow as tf
+from official.core import config_definitions
+from official.modeling import optimization
+from official.modeling import performance
+OptimizationConfig = optimization.OptimizationConfig
+RuntimeConfig = config_definitions.RuntimeConfig
+class Task(tf.Module, metaclass=abc.ABCMeta):
+  """A single-replica view of training procedure.
+  Tasks provide artifacts for training/validation procedures, including
+  loading/iterating over Datasets, training/validation steps, calculating the
+  loss and customized metrics with reduction.
+  """
+  # Special keys in train/validate step returned logs.
+  loss = "loss"
+  def __init__(self,
+               params,
+               logging_dir: Optional[str] = None,
+               name: Optional[str] = None):
+    """Task initialization.
+    Args:
+      params: the task configuration instance, which can be any of dataclass,
+        ConfigDict, namedtuple, etc.
+      logging_dir: a string pointing to where the model, summaries etc. will be
+        saved. You can also write additional stuff in this directory.
+      name: the task name.
+    """
+    super().__init__(name=name)
+    self._task_config = params
+    self._logging_dir = logging_dir
+  @property
+  def task_config(self):
+    return self._task_config
+  @property
+  def logging_dir(self) -> str:
+    return self._logging_dir
+  @classmethod
+  def create_optimizer(cls, optimizer_config: OptimizationConfig,
+                       runtime_config: Optional[RuntimeConfig] = None):
+    """Creates an TF optimizer from configurations.
+    Args:
+      optimizer_config: the parameters of the Optimization settings.
+      runtime_config: the parameters of the runtime.
+    Returns:
+      A tf.optimizers.Optimizer object.
+    """
+    opt_factory = optimization.OptimizerFactory(optimizer_config)
+    optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate())
+    # Configuring optimizer when loss_scale is set in runtime config. This helps
+    # avoiding overflow/underflow for float16 computations.
+    if runtime_config:
+      optimizer = performance.configure_optimizer(
+          optimizer,
+          use_float16=runtime_config.mixed_precision_dtype == "float16",
+          loss_scale=runtime_config.loss_scale)
+    return optimizer
+  def initialize(self, model: tf.keras.Model):
+    """[Optional] A callback function used as CheckpointManager's init_fn.
+    This function will be called when no checkpoint is found for the model.
+    If there is a checkpoint, the checkpoint will be loaded and this function
+    will not be called. You can use this callback function to load a pretrained
+    checkpoint, saved under a directory other than the model_dir.
+    Args:
+      model: The keras.Model built or used by this task.
+    """
+    ckpt_dir_or_file = self.task_config.init_checkpoint
+    logging.info("Trying to load pretrained checkpoint from %s",
+                 ckpt_dir_or_file)
+    if tf.io.gfile.isdir(ckpt_dir_or_file):
+      ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
+    if not ckpt_dir_or_file:
+      return
+    if hasattr(model, "checkpoint_items"):
+      checkpoint_items = model.checkpoint_items
+    else:
+      checkpoint_items = dict(model=model)
+    ckpt = tf.train.Checkpoint(**checkpoint_items)
+    status = ckpt.read(ckpt_dir_or_file)
+    status.expect_partial().assert_existing_objects_matched()
+    logging.info("Finished loading pretrained checkpoint from %s",
+                 ckpt_dir_or_file)
+  def build_model(self) -> tf.keras.Model:
+    """[Optional] Creates model architecture.
+    Returns:
+      A model instance.
+    """  # pytype: disable=bad-return-type  # typed-keras
+  @abc.abstractmethod
+  def build_inputs(self,
+                   params,
+                   input_context: Optional[tf.distribute.InputContext] = None):
+    """Returns a dataset or a nested structure of dataset functions.
+    Dataset functions define per-host datasets with the per-replica batch size.
+    With distributed training, this method runs on remote hosts.
+    Args:
+      params: hyperparams to create input pipelines, which can be any of
+        dataclass, ConfigDict, namedtuple, etc.
+      input_context: optional distribution input pipeline context.
+    Returns:
+      A nested structure of per-replica input functions.
+    """
+  def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
+    """Standard interface to compute losses.
+    Args:
+      labels: optional label tensors.
+      model_outputs: a nested structure of output tensors.
+      aux_losses: auxiliary loss tensors, i.e. `losses` in keras.Model.
+    Returns:
+      The total loss tensor.
+    """
+    del model_outputs, labels
+    if aux_losses is None:
+      losses = [tf.constant(0.0, dtype=tf.float32)]
+    else:
+      losses = aux_losses
+    total_loss = tf.add_n(losses)
+    return total_loss
+  def build_metrics(self, training: bool = True):
+    """Gets streaming metrics for training/validation."""
+    del training
+    return []
+  def process_metrics(self, metrics, labels, model_outputs, **kwargs):
+    """Process and update metrics.
+    Called when using custom training loop API.
+    Args:
+      metrics: a nested structure of metrics objects. The return of function
+        self.build_metrics.
+      labels: a tensor or a nested structure of tensors.
+      model_outputs: a tensor or a nested structure of tensors. For example,
+        output of the keras model built by self.build_model.
+      **kwargs: other args.
+    """
+    for metric in metrics:
+      metric.update_state(labels, model_outputs)
+  def process_compiled_metrics(self, compiled_metrics, labels, model_outputs):
+    """Process and update compiled_metrics.
+    call when using compile/fit API.
+    Args:
+      compiled_metrics: the compiled metrics (model.compiled_metrics).
+      labels: a tensor or a nested structure of tensors.
+      model_outputs: a tensor or a nested structure of tensors. For example,
+        output of the keras model built by self.build_model.
+    """
+    compiled_metrics.update_state(labels, model_outputs)
+  def train_step(self,
+                 inputs,
+                 model: tf.keras.Model,
+                 optimizer: tf.keras.optimizers.Optimizer,
+                 metrics=None):
+    """Does forward and backward.
+    With distribution strategies, this method runs on devices.
+    Args:
+      inputs: a dictionary of input tensors.
+      model: the model, forward pass definition.
+      optimizer: the optimizer for this training step.
+      metrics: a nested structure of metrics objects.
+    Returns:
+      A dictionary of logs.
+    """
+    if isinstance(inputs, tuple) and len(inputs) == 2:
+      features, labels = inputs
+    else:
+      features, labels = inputs, inputs
+    with tf.GradientTape() as tape:
+      outputs = model(features, training=True)
+      # Computes per-replica loss.
+      if model.compiled_loss:
+        loss = model.compiled_loss(
+            labels, outputs, regularization_losses=model.losses)
+        loss += self.build_losses(
+            labels=labels, model_outputs=outputs, aux_losses=None)
+      else:
+        loss = self.build_losses(
+            labels=labels, model_outputs=outputs, aux_losses=model.losses)
+      # Scales loss as the default gradients allreduce performs sum inside the
+      # optimizer.
+      scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync
+      # For mixed precision, when a LossScaleOptimizer is used, the loss is
+      # scaled to avoid numeric underflow.
+      if isinstance(optimizer,
+                    tf.keras.mixed_precision.LossScaleOptimizer):
+        scaled_loss = optimizer.get_scaled_loss(scaled_loss)
+    tvars = model.trainable_variables
+    grads = tape.gradient(scaled_loss, tvars)
+    if isinstance(optimizer,
+                  tf.keras.mixed_precision.LossScaleOptimizer):
+      grads = optimizer.get_unscaled_gradients(grads)
+    optimizer.apply_gradients(list(zip(grads, tvars)))
+    logs = {self.loss: loss}
+    if metrics:
+      self.process_metrics(metrics, labels, outputs)
+    if model.compiled_metrics:
+      self.process_compiled_metrics(model.compiled_metrics, labels, outputs)
+      logs.update({m.name: m.result() for m in metrics or []})
+      logs.update({m.name: m.result() for m in model.metrics})
+    return logs
+  def validation_step(self, inputs, model: tf.keras.Model, metrics=None):
+    """Validation step.
+    With distribution strategies, this method runs on devices.
+    Args:
+      inputs: a dictionary of input tensors.
+      model: the keras.Model.
+      metrics: a nested structure of metrics objects.
+    Returns:
+      A dictionary of logs.
+    """
+    if isinstance(inputs, tuple) and len(inputs) == 2:
+      features, labels = inputs
+    else:
+      features, labels = inputs, inputs
+    outputs = self.inference_step(features, model)
+    loss = self.build_losses(
+        labels=labels, model_outputs=outputs, aux_losses=model.losses)
+    logs = {self.loss: loss}
+    if metrics:
+      self.process_metrics(metrics, labels, outputs)
+    if model.compiled_metrics:
+      self.process_compiled_metrics(model.compiled_metrics, labels, outputs)
+      logs.update({m.name: m.result() for m in metrics or []})
+      logs.update({m.name: m.result() for m in model.metrics})
+    return logs
+  def inference_step(self, inputs, model: tf.keras.Model):
+    """Performs the forward step.
+    With distribution strategies, this method runs on devices.
+    Args:
+      inputs: a dictionary of input tensors.
+      model: the keras.Model.
+    Returns:
+      Model outputs.
+    """
+    return model(inputs, training=False)
+  def aggregate_logs(self, state, step_logs):
+    """Optional aggregation over logs returned from a validation step.
+    Given step_logs from a validation step, this function aggregates the logs
+    after each eval_step() (see eval_reduce() function in
+    official/core/base_trainer.py). It runs on CPU and can be used to aggregate
+    metrics during validation, when there are too many metrics that cannot fit
+    into TPU memory. Note that this may increase latency due to data transfer
+    between TPU and CPU. Also, the step output from a validation step may be a
+    tuple with elements from replicas, and a concatenation of the elements is
+    needed in such case.
+    Args:
+      state: The current state of training, for example, it can be a sequence of
+        metrics.
+      step_logs: Logs from a validation step. Can be a dictionary.
+    """
+    pass
+  def reduce_aggregated_logs(self,
+                             aggregated_logs,
+                             global_step: Optional[tf.Tensor] = None):
+    """Optional reduce of aggregated logs over validation steps.
+    This function reduces aggregated logs at the end of validation, and can be
+    used to compute the final metrics. It runs on CPU and in each eval_end() in
+    base trainer (see eval_end() function in official/core/base_trainer.py).
+    Args:
+      aggregated_logs: Aggregated logs over multiple validation steps.
+      global_step: An optional variable of global step.
+    Returns:
+      A dictionary of reduced results.
+    """
+    return {}
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/base_trainer.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/base_trainer.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Standard Trainer implementation.
+The base trainer implements the Orbit `StandardTrainable` and
+`StandardEvaluable` interfaces. Trainers inside this project should be
+interchangable and independent on model architectures and tasks.
+"""
+import functools
+from typing import Union, Optional
+from absl import logging
+import gin
+import orbit
+import tensorflow as tf
+from official.core import base_task
+from official.core import config_definitions
+from official.modeling import optimization
+ExperimentConfig = config_definitions.ExperimentConfig
+TrainerConfig = config_definitions.TrainerConfig
+class Recovery:
+  """Built-in model blowup recovery module.
+  Checks the loss value by the given threshold. If applicable, recover the
+  model by reading the checkpoint on disk.
+  """
+  def __init__(self,
+               loss_upper_bound: float,
+               checkpoint_manager: tf.train.CheckpointManager,
+               recovery_begin_steps: int = 0,
+               recovery_max_trials: int = 3):
+    self.recover_counter = 0
+    self.recovery_begin_steps = recovery_begin_steps
+    self.recovery_max_trials = recovery_max_trials
+    self.loss_upper_bound = loss_upper_bound
+    self.checkpoint_manager = checkpoint_manager
+  def should_recover(self, loss_value, global_step):
+    if tf.math.is_nan(loss_value):
+      return True
+    if (global_step >= self.recovery_begin_steps and
+        loss_value > self.loss_upper_bound):
+      return True
+    return False
+  def maybe_recover(self, loss_value, global_step):
+    """Conditionally recovers the training by triggering checkpoint restoration.
+    Args:
+      loss_value: the loss value as a float.
+      global_step: the number of global training steps.
+    Raises:
+      RuntimeError: when recovery happens more than the max number of trials,
+      the job should crash.
+    """
+    if not self.should_recover(loss_value, global_step):
+      return
+    self.recover_counter += 1
+    if self.recover_counter > self.recovery_max_trials:
+      raise RuntimeError(
+          "The loss value is NaN or out of range after training loop and "
+          f"this happens {self.recover_counter} times.")
+    # Loads the previous good checkpoint.
+    checkpoint_path = self.checkpoint_manager.restore_or_initialize()
+    logging.warning(
+        "Recovering the model from checkpoint: %s. The loss value becomes "
+        "%f at step %d.", checkpoint_path, loss_value, global_step)
+class _AsyncTrainer(orbit.StandardTrainer, orbit.StandardEvaluator):
+  """Trainer class for both sync and async Strategy."""
+  def init_async(self):
+    """Initializes the Async Trainer base class."""
+    assert isinstance(self._strategy, tf.distribute.Strategy)
+    self._is_async = isinstance(
+        self._strategy, tf.distribute.experimental.ParameterServerStrategy)
+    self._coordinator = None
+    if self._is_async:
+      self._coordinator = (
+          tf.distribute.experimental.coordinator.ClusterCoordinator(
+              self._strategy))
+  def join(self):
+    """Join all async steps. Only useful in aysnc training."""
+    if getattr(self, "_is_async", False):
+      self._coordinator.join()
+  def create_train_loop_fn(self):
+    """Creates a eval loop from the given step function and options."""
+    train_loop_fn = super().create_train_loop_fn()
+    if getattr(self, "_is_async", False):
+      def _async_loop_fn(iterator, num_steps):
+        self._coordinator.schedule(train_loop_fn, args=(iterator, num_steps))
+      return _async_loop_fn
+    else:
+      return train_loop_fn
+  def create_eval_loop_fn(self, has_state: bool):
+    """Creates a training loop from the given step function and options."""
+    eval_loop_fn = super().create_eval_loop_fn(has_state)
+    if getattr(self, "_is_async", False):
+      if has_state:
+        raise ValueError(
+            "Stateful eval loop is not supported in async training.")
+      def _async_loop_fn(iterator, num_steps, state=None, reduce_fn=None):
+        assert state is None
+        assert reduce_fn is None
+        self._coordinator.schedule(eval_loop_fn, args=(iterator, num_steps))
+      return _async_loop_fn
+    else:
+      return eval_loop_fn
+  def distribute_dataset(self, dataset_or_fn, *args, **kwargs):
+    """A utility function to help create a `tf.distribute.DistributedDataset`.
+    Args:
+      dataset_or_fn: A instance of `tf.data.Dataset`, or a "dataset function"
+        returning a `tf.data.Dataset`. If it is a function, it may optionally
+        have an argument named `input_context` which will be passed a
+        `tf.distribute.InputContext` instance.
+      *args: Any positional arguments to pass through to `dataset_or_fn`.
+      **kwargs: Any keyword arguments to pass through to `dataset_or_fn`.
+    Returns:
+      A distributed Dataset.
+    """
+    if getattr(self, "_is_async", False):
+      per_worker_dataset_fn = functools.partial(
+          orbit.utils.make_distributed_dataset, self._strategy, dataset_or_fn,
+          *args, **kwargs)
+      per_worker_dataset_fn = tf.function(per_worker_dataset_fn)
+      return self._coordinator.create_per_worker_dataset(per_worker_dataset_fn)
+    else:
+      return orbit.utils.make_distributed_dataset(self._strategy, dataset_or_fn,
+                                                  *args, **kwargs)
+def get_runtime_options(config: ExperimentConfig):
+  """Get tf.distribute.RunOptions from config."""
+  xla_options = {}
+  if config.runtime.tpu_enable_xla_dynamic_padder is not None:
+    xla_options["enable_xla_dynamic_padder"] = (
+        config.runtime.tpu_enable_xla_dynamic_padder)
+  return tf.distribute.RunOptions(
+      experimental_xla_options=tf.tpu.XLAOptions(**xla_options))
+@gin.configurable
+class Trainer(_AsyncTrainer):
+  """Implements the common trainer shared for TensorFlow models."""
+  # pylint: disable=super-init-not-called
+  def __init__(
+      self,
+      config: ExperimentConfig,
+      task: base_task.Task,
+      model: tf.keras.Model,
+      optimizer: tf.optimizers.Optimizer,
+      train: bool = True,
+      evaluate: bool = True,
+      train_dataset: Optional[Union[tf.data.Dataset,
+                                    tf.distribute.DistributedDataset]] = None,
+      validation_dataset: Optional[Union[
+          tf.data.Dataset, tf.distribute.DistributedDataset]] = None,
+      checkpoint_exporter=None):
+    """Initialize common trainer for TensorFlow models.
+    Args:
+      config: An `ExperimentConfig` instance specifying experiment config.
+      task: A base_task.Task instance.
+      model: The model instance, e.g. a tf.keras.Model instance.
+      optimizer: tf.optimizers.Optimizer instance.
+      train: bool, whether or not this trainer will be used for training.
+        default to True.
+      evaluate: bool, whether or not this trainer will be used for evaluation.
+        default to True.
+      train_dataset: a dataset object created for training. With tf.distribute,
+        it needs to be a `DistributedDataset`.
+      validation_dataset: a dataset object created for evaluation. With
+        tf.distribute, it needs to be a `DistributedDataset`. The evaluator will
+        create a dataset iterator for each eval round, so the dataset does not
+        need to repeat.
+      checkpoint_exporter: an object that has the `maybe_export_checkpoint`
+        interface.
+    """
+    # Gets the current distribution strategy. If not inside any strategy scope,
+    # it gets a single-replica no-op strategy.
+    self._strategy = tf.distribute.get_strategy()
+    self._validate_params(
+        config,
+        check_train_data=train_dataset is None,
+        check_validation_data=validation_dataset is None)
+    self._config = config
+    self._task = task
+    self._model = model
+    self._optimizer = optimizer
+    self._checkpoint_exporter = checkpoint_exporter
+    self._recovery = None
+    # Runtime options are only applied to train_step.
+    # We use default for eval_step.
+    self._runtime_options = get_runtime_options(config)
+    # Creates a shadow copy of the weights to store weights moving average.
+    if isinstance(self._optimizer, optimization.ExponentialMovingAverage
+                 ) and not self._optimizer.has_shadow_copy:
+      self._optimizer.shadow_copy(self._model)
+    # global_step increases by 1 after each training iteration.
+    # We should have global_step.numpy() == self.optimizer.iterations.numpy()
+    # when there is only 1 optimizer.
+    self._global_step = orbit.utils.create_global_step()
+    if hasattr(self.model, "checkpoint_items"):
+      checkpoint_items = self.model.checkpoint_items
+    else:
+      checkpoint_items = {}
+    self._checkpoint = tf.train.Checkpoint(
+        global_step=self.global_step,
+        model=self.model,
+        optimizer=self.optimizer,
+        **checkpoint_items)
+    self._train_loss = tf.keras.metrics.Mean("training_loss", dtype=tf.float32)
+    self._validation_loss = tf.keras.metrics.Mean(
+        "validation_loss", dtype=tf.float32)
+    model_metrics = model.metrics if hasattr(model, "metrics") else []
+    self._train_metrics = self.task.build_metrics(
+        training=True) + model_metrics
+    self._validation_metrics = self.task.build_metrics(
+        training=False) + model_metrics
+    self.init_async()
+    if train:
+      train_dataset = train_dataset or self.distribute_dataset(
+          self.task.build_inputs, self.config.task.train_data)
+      orbit.StandardTrainer.__init__(
+          self,
+          train_dataset,
+          options=orbit.StandardTrainerOptions(
+              use_tf_while_loop=config.trainer.train_tf_while_loop,
+              use_tf_function=config.trainer.train_tf_function,
+              use_tpu_summary_optimization=config.trainer.allow_tpu_summary))
+    if evaluate:
+      validation_dataset = validation_dataset or self.distribute_dataset(
+          self.task.build_inputs, self.config.task.validation_data)
+      orbit.StandardEvaluator.__init__(
+          self,
+          validation_dataset,
+          options=orbit.StandardEvaluatorOptions(
+              use_tf_function=config.trainer.eval_tf_function,
+              use_tf_while_loop=config.trainer.eval_tf_while_loop))
+  def _validate_params(self,
+                       config,
+                       check_train_data=True,
+                       check_validation_data=True):
+    r"""Validates if the configuration object passed to the Trainer.
+    The experiment configuration should be structured as:
+    \trainer
+    \task
+      \train_data
+      \validation_data
+    Args:
+      config: a namedtuple, dataclass, ConfigDict, etc.
+      check_train_data: whether to check task.train_data field.
+      check_validation_data: whether to check task.validation_data field.
+    """
+    if not hasattr(config, "trainer"):
+      raise AttributeError("The trainer requires the configuration contains an"
+                           " attribute `trainer`.")
+    if not hasattr(config, "task"):
+      raise AttributeError("The trainer requires the configuration contains an"
+                           " attribute `task`.")
+    if check_train_data and not hasattr(config.task, "train_data"):
+      raise AttributeError("The trainer requires the configuration contains an"
+                           " attribute `task.train_data`.")
+    if check_validation_data and not hasattr(config.task, "validation_data"):
+      raise AttributeError("The trainer requires the configuration contains an"
+                           " attribute `task.validation_data`.")
+  @property
+  def strategy(self):
+    return self._strategy
+  @property
+  def config(self):
+    return self._config
+  @property
+  def task(self):
+    return self._task
+  @property
+  def model(self):
+    return self._model
+  @property
+  def optimizer(self):
+    if hasattr(self, "_optimizer"):
+      return self._optimizer
+    else:
+      return None
+  @property
+  def global_step(self):
+    return self._global_step
+  @property
+  def train_loss(self):
+    """Accesses the training loss metric object."""
+    return self._train_loss
+  @property
+  def validation_loss(self):
+    """Accesses the validation loss metric object."""
+    return self._validation_loss
+  @property
+  def train_metrics(self):
+    """Accesses all training metric objects."""
+    return self._train_metrics
+  @property
+  def validation_metrics(self):
+    """Accesses all validation metric metric objects."""
+    return self._validation_metrics
+  def initialize(self):
+    """A callback function.
+    This function will be called when no checkpoint found for the model.
+    If there is a checkpoint, the checkpoint will be loaded and this function
+    will not be called. Tasks may use this callback function to load a
+    pretrained checkpoint, saved under a directory other than the model_dir.
+    """
+    self.task.initialize(self.model)
+  @property
+  def checkpoint(self):
+    """Accesses the training checkpoint."""
+    return self._checkpoint
+  # TODO(yejiayu): Remove this once all deps are fixed.
+  def add_recovery(self, params: TrainerConfig,
+                   checkpoint_manager: tf.train.CheckpointManager):
+    if params.recovery_max_trials >= 0:
+      self._recovery = Recovery(
+          loss_upper_bound=params.loss_upper_bound,
+          recovery_begin_steps=params.recovery_begin_steps,
+          recovery_max_trials=params.recovery_max_trials,
+          checkpoint_manager=checkpoint_manager)
+  def train_loop_end(self):
+    """See base class."""
+    self.join()
+    logs = {}
+    for metric in self.train_metrics + [self.train_loss]:
+      logs[metric.name] = metric.result()
+      metric.reset_states()
+    if callable(self.optimizer.learning_rate):
+      # Maybe a self-implemented optimizer does not have `optimizer.iterations`.
+      # So just to be safe here.
+      if hasattr(self.optimizer, "iterations"):
+        logs["learning_rate"] = self.optimizer.learning_rate(
+            self.optimizer.iterations)
+      else:
+        logs["learning_rate"] = self.optimizer.learning_rate(self.global_step)
+    else:
+      logs["learning_rate"] = self.optimizer.learning_rate
+    return logs
+  def train_step(self, iterator):
+    """See base class."""
+    def step_fn(inputs):
+      if self.config.runtime.enable_xla and (self.config.runtime.num_gpus > 0):
+        task_train_step = tf.function(self.task.train_step, jit_compile=True)
+      else:
+        task_train_step = self.task.train_step
+      logs = task_train_step(
+          inputs,
+          model=self.model,
+          optimizer=self.optimizer,
+          metrics=self.train_metrics)
+      self._train_loss.update_state(logs[self.task.loss])
+      self.global_step.assign_add(1)
+    self.strategy.run(
+        step_fn, args=(next(iterator),), options=self._runtime_options)
+  def eval_begin(self):
+    """Sets up metrics."""
+    for metric in self.validation_metrics + [self.validation_loss]:
+      metric.reset_states()
+    # Swaps weights to test on weights moving average.
+    if self.optimizer and isinstance(self.optimizer,
+                                     optimization.ExponentialMovingAverage):
+      self.optimizer.swap_weights()
+  def eval_step(self, iterator):
+    """See base class."""
+    def step_fn(inputs):
+      logs = self.task.validation_step(
+          inputs, model=self.model, metrics=self.validation_metrics)
+      if self.task.loss in logs:
+        self._validation_loss.update_state(logs[self.task.loss])
+      return logs
+    distributed_outputs = self.strategy.run(step_fn, args=(next(iterator),))
+    return tf.nest.map_structure(self.strategy.experimental_local_results,
+                                 distributed_outputs)
+  def eval_end(self, aggregated_logs=None):
+    """Processes evaluation results."""
+    self.join()
+    logs = {}
+    for metric in self.validation_metrics:
+      logs[metric.name] = metric.result()
+    if self.validation_loss.count.numpy() != 0:
+      logs[self.validation_loss.name] = self.validation_loss.result()
+    else:
+      # `self.validation_loss` metric was not updated, because the validation
+      # loss was not returned from the task's `validation_step` method.
+      logging.info("The task did not report validation loss.")
+    if aggregated_logs:
+      metrics = self.task.reduce_aggregated_logs(
+          aggregated_logs, global_step=self.global_step)
+      logs.update(metrics)
+    if self._checkpoint_exporter:
+      self._checkpoint_exporter.maybe_export_checkpoint(
+          self.checkpoint, logs, self.global_step.numpy())
+      metric_name = self.config.trainer.best_checkpoint_eval_metric
+      logs["best_" +
+           metric_name] = self._checkpoint_exporter.best_ckpt_logs[metric_name]
+    # Swaps back weights after testing when EMA is used.
+    # This happens after best checkpoint export so that average weights used for
+    # eval are exported instead of regular weights.
+    if self.optimizer and isinstance(self.optimizer,
+                                     optimization.ExponentialMovingAverage):
+      self.optimizer.swap_weights()
+    return logs
+  def eval_reduce(self, state=None, step_outputs=None):
+    return self.task.aggregate_logs(state, step_outputs)
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/base_trainer_test.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/base_trainer_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for tensorflow_models.core.trainers.trainer."""
+# pylint: disable=g-direct-tensorflow-import
+import gc
+import multiprocessing
+import os
+import sys
+from absl.testing import parameterized
+import orbit
+import portpicker
+import tensorflow as tf
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from official.core import base_trainer as trainer_lib
+from official.core import config_definitions as cfg
+from official.core import train_lib
+from official.utils.testing import mock_task
+TPU_TEST = 'test_tpu' in sys.argv[0]
+GPU_TEST = 'test_gpu' in sys.argv[0]
+def all_strategy_combinations():
+  return combinations.combine(
+      distribution=[
+          strategy_combinations.default_strategy,
+          strategy_combinations.cloud_tpu_strategy,
+          strategy_combinations.one_device_strategy_gpu,
+      ],)
+def create_in_process_cluster(num_workers, num_ps):
+  """Creates and starts local servers and returns the cluster_resolver."""
+  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
+  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
+  cluster_dict = {}
+  cluster_dict['worker'] = ['localhost:%s' % port for port in worker_ports]
+  if num_ps > 0:
+    cluster_dict['ps'] = ['localhost:%s' % port for port in ps_ports]
+  cluster_spec = tf.train.ClusterSpec(cluster_dict)
+  # Workers need some inter_ops threads to work properly.
+  worker_config = tf.compat.v1.ConfigProto()
+  if multiprocessing.cpu_count() < num_workers + 1:
+    worker_config.inter_op_parallelism_threads = num_workers + 1
+  for i in range(num_workers):
+    tf.distribute.Server(
+        cluster_spec,
+        job_name='worker',
+        task_index=i,
+        config=worker_config,
+        protocol='grpc')
+  for i in range(num_ps):
+    tf.distribute.Server(
+        cluster_spec, job_name='ps', task_index=i, protocol='grpc')
+  cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver(
+      cluster_spec, rpc_layer='grpc')
+  return cluster_resolver
+def dataset_fn(input_context=None):
+  del input_context
+  def dummy_data(_):
+    return tf.zeros((1, 1), dtype=tf.float32)
+  dataset = tf.data.Dataset.range(1)
+  dataset = dataset.repeat()
+  dataset = dataset.map(
+      dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+  return dataset
+class MockAsyncTrainer(trainer_lib._AsyncTrainer):
+  """Mock AsyncTrainer to test the _AsyncTrainer class."""
+  def __init__(self):
+    self._strategy = tf.distribute.get_strategy()
+    self.init_async()
+    self.global_step = tf.Variable(
+        0,
+        dtype=tf.int64,
+        name='global_step',
+        trainable=False,
+        aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA)
+    self.eval_global_step = tf.Variable(
+        0,
+        dtype=tf.int64,
+        name='eval_global_step',
+        trainable=False,
+        aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA)
+    train_dataset = self.distribute_dataset(dataset_fn)
+    orbit.StandardTrainer.__init__(
+        self, train_dataset, options=orbit.StandardTrainerOptions())
+    validation_dataset = self.distribute_dataset(dataset_fn)
+    orbit.StandardEvaluator.__init__(
+        self,
+        validation_dataset,
+        options=orbit.StandardEvaluatorOptions(use_tf_while_loop=True))
+  def train_loop_begin(self):
+    self.global_step.assign(0)
+  def train_step(self, iterator):
+    def replica_step(_):
+      self.global_step.assign_add(1)
+    self._strategy.run(replica_step, args=(next(iterator),))
+  def train_loop_end(self):
+    self.join()
+    return self.global_step.numpy()
+  def eval_begin(self):
+    self.eval_global_step.assign(0)
+  def eval_step(self, iterator):
+    def replica_step(_):
+      self.eval_global_step.assign_add(1)
+    self._strategy.run(replica_step, args=(next(iterator),))
+  def eval_end(self):
+    self.join()
+    return self.eval_global_step.numpy()
+class RecoveryTest(tf.test.TestCase):
+  def test_recovery_module(self):
+    ckpt = tf.train.Checkpoint(v=tf.Variable(1, dtype=tf.int32))
+    model_dir = self.get_temp_dir()
+    manager = tf.train.CheckpointManager(ckpt, model_dir, max_to_keep=1)
+    recovery_module = trainer_lib.Recovery(
+        loss_upper_bound=1.0,
+        checkpoint_manager=manager,
+        recovery_begin_steps=1,
+        recovery_max_trials=1)
+    self.assertFalse(recovery_module.should_recover(1.1, 0))
+    self.assertFalse(recovery_module.should_recover(0.1, 1))
+    self.assertTrue(recovery_module.should_recover(1.1, 2))
+    # First triggers the recovery once.
+    recovery_module.maybe_recover(1.1, 10)
+    # Second time, it raises.
+    with self.assertRaisesRegex(
+        RuntimeError, 'The loss value is NaN .*'):
+      recovery_module.maybe_recover(1.1, 10)
+class TrainerTest(tf.test.TestCase, parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._config = cfg.ExperimentConfig(
+        trainer=cfg.TrainerConfig(
+            optimizer_config=cfg.OptimizationConfig({
+                'optimizer': {
+                    'type': 'sgd'
+                },
+                'learning_rate': {
+                    'type': 'constant'
+                }
+            })))
+  def tearDown(self):
+    gc.collect()
+    # This will only contain uncollectable garbage, i.e. reference cycles
+    # involving objects with __del__ defined.
+    self.assertEmpty(gc.garbage)
+    super().tearDown()
+  def create_test_trainer(self, config, model_dir=None, task=None):
+    task = task or mock_task.MockTask(config.task, logging_dir=model_dir)
+    ckpt_exporter = train_lib.maybe_create_best_ckpt_exporter(config, model_dir)
+    trainer = trainer_lib.Trainer(
+        config,
+        task,
+        model=task.build_model(),
+        optimizer=task.create_optimizer(config.trainer.optimizer_config,
+                                        config.runtime),
+        checkpoint_exporter=ckpt_exporter)
+    return trainer
+  @combinations.generate(all_strategy_combinations())
+  def test_trainer_train(self, distribution):
+    with distribution.scope():
+      trainer = self.create_test_trainer(self._config)
+      logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
+      self.assertIn('training_loss', logs)
+      self.assertIn('learning_rate', logs)
+  @combinations.generate(all_strategy_combinations())
+  def test_trainer_passing_datasets(self, distribution):
+    with distribution.scope():
+      task = mock_task.MockTask(self._config)
+      train_dataset = orbit.utils.make_distributed_dataset(
+          distribution, task.build_inputs, self._config.task.train_data)
+      validation_dataset = orbit.utils.make_distributed_dataset(
+          distribution, task.build_inputs, self._config.task.validation_data)
+      self._config.task.train_data = None
+      self._config.task.validation_data = None
+      trainer = trainer_lib.Trainer(
+          self._config,
+          task,
+          model=task.build_model(),
+          optimizer=task.create_optimizer(self._config.trainer.optimizer_config,
+                                          self._config.runtime),
+          train_dataset=train_dataset,
+          validation_dataset=validation_dataset)
+    logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
+    self.assertIn('training_loss', logs)
+    self.assertIn('learning_rate', logs)
+    logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32))
+    self.assertIn('validation_loss', logs)
+  def test_base_async_trainer(self):
+    if TPU_TEST or GPU_TEST:
+      self.skipTest('Aysnc training is not available on GPU/GPU.')
+    num_workers = 3
+    num_ps = 2
+    cluster_resolver = create_in_process_cluster(num_workers, num_ps)
+    distribution = tf.distribute.experimental.ParameterServerStrategy(
+        cluster_resolver)
+    with distribution.scope():
+      trainer = MockAsyncTrainer()
+      trainer.init_async()
+      self.assertIsInstance(
+          trainer._coordinator,
+          tf.distribute.experimental.coordinator.ClusterCoordinator)
+      self.assertEqual(trainer.train(tf.constant(10)), 10)
+      self.assertEqual(trainer.evaluate(tf.constant(11)), 11)
+  def test_async_trainer_train(self):
+    if TPU_TEST or GPU_TEST:
+      self.skipTest('Aysnc training is not available on GPU/TPU.')
+    num_workers = 3
+    num_ps = 2
+    cluster_resolver = create_in_process_cluster(num_workers, num_ps)
+    distribution = tf.distribute.experimental.ParameterServerStrategy(
+        cluster_resolver)
+    with distribution.scope():
+      config = cfg.ExperimentConfig(**self._config.as_dict())
+      config.trainer.eval_tf_while_loop = True
+      trainer = self.create_test_trainer(config)
+      logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
+      self.assertIn('training_loss', logs)
+      self.assertIn('learning_rate', logs)
+  def test_async_trainer_validate(self):
+    if TPU_TEST or GPU_TEST:
+      self.skipTest('Aysnc training is not available on GPU/GPU.')
+    num_workers = 3
+    num_ps = 2
+    cluster_resolver = create_in_process_cluster(num_workers, num_ps)
+    distribution = tf.distribute.experimental.ParameterServerStrategy(
+        cluster_resolver)
+    with distribution.scope():
+      config = cfg.ExperimentConfig(**self._config.as_dict())
+      config.trainer.eval_tf_while_loop = True
+      trainer = self.create_test_trainer(config)
+      logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32))
+      self.assertIn('acc', logs)
+      self.assertIn('validation_loss', logs)
+  @combinations.generate(all_strategy_combinations())
+  def test_trainer_validate(self, distribution):
+    with distribution.scope():
+      trainer = self.create_test_trainer(self._config)
+      logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32))
+      self.assertEqual(logs['counter'], 5. * distribution.num_replicas_in_sync)
+      self.assertIn('validation_loss', logs)
+  @combinations.generate(all_strategy_combinations())
+  def test_trainer_validate_without_loss(self, distribution):
+    class MockTaskWithoutValidationLoss(mock_task.MockTask):
+      def validation_step(self, inputs, model, metrics=None):
+        # Disable validation loss.
+        logs = super().validation_step(inputs, model)
+        del logs[self.loss]
+        return logs
+    with distribution.scope():
+      task = MockTaskWithoutValidationLoss()
+      trainer = self.create_test_trainer(self._config, task=task)
+      logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32))
+      self.assertEqual(logs['counter'], 5. * distribution.num_replicas_in_sync)
+      self.assertNotIn('validation_loss', logs)
+  @combinations.generate(
+      combinations.combine(
+          mixed_precision_dtype=['float32', 'bfloat16', 'float16'],
+          loss_scale=[None, 'dynamic', 128, 256],
+      ))
+  def test_configure_optimizer(self, mixed_precision_dtype, loss_scale):
+    config = cfg.ExperimentConfig(
+        runtime=cfg.RuntimeConfig(
+            mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale),
+        trainer=cfg.TrainerConfig(
+            optimizer_config=cfg.OptimizationConfig({
+                'optimizer': {
+                    'type': 'sgd'
+                },
+                'learning_rate': {
+                    'type': 'constant'
+                },
+            })))
+    trainer = self.create_test_trainer(config)
+    if mixed_precision_dtype == 'float16':
+      self.assertIsInstance(trainer.optimizer,
+                            tf.keras.mixed_precision.LossScaleOptimizer)
+      if loss_scale in (None, 'dynamic'):
+        self.assertTrue(trainer.optimizer.dynamic)
+      else:
+        self.assertFalse(trainer.optimizer.dynamic)
+        self.assertEqual(trainer.optimizer.initial_scale, loss_scale)
+    else:
+      self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)
+    metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
+    self.assertIn('training_loss', metrics)
+  def test_export_best_ckpt(self):
+    config = cfg.ExperimentConfig(
+        trainer=cfg.TrainerConfig(
+            best_checkpoint_export_subdir='best_ckpt',
+            best_checkpoint_eval_metric='acc',
+            optimizer_config=cfg.OptimizationConfig({
+                'optimizer': {
+                    'type': 'sgd'
+                },
+                'learning_rate': {
+                    'type': 'constant'
+                }
+            })))
+    model_dir = self.get_temp_dir()
+    trainer = self.create_test_trainer(config, model_dir=model_dir)
+    trainer.train(tf.convert_to_tensor(1, dtype=tf.int32))
+    trainer.evaluate(tf.convert_to_tensor(1, dtype=tf.int32))
+    self.assertTrue(
+        tf.io.gfile.exists(os.path.join(model_dir, 'best_ckpt', 'info.json')))
+  def test_model_with_compiled_loss(self):
+    task = mock_task.MockTask()
+    model = task.build_model()
+    model.compile(loss=tf.keras.losses.CategoricalCrossentropy())
+    trainer = trainer_lib.Trainer(
+        self._config,
+        task,
+        model=model,
+        optimizer=task.create_optimizer(self._config.trainer.optimizer_config))
+    logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
+    self.assertIn('training_loss', logs)
+if __name__ == '__main__':
+  tf.test.main()
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/config_definitions.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/config_definitions.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Common configuration settings."""
+import dataclasses
+from typing import Optional, Sequence, Union
+from official.modeling.hyperparams import base_config
+from official.modeling.optimization.configs import optimization_config
+OptimizationConfig = optimization_config.OptimizationConfig
+@dataclasses.dataclass
+class DataConfig(base_config.Config):
+  """The base configuration for building datasets.
+  Attributes:
+    input_path: The path to the input. It can be either (1) a str indicating a
+      file path/pattern, or (2) a str indicating multiple file paths/patterns
+      separated by comma (e.g "a, b, c" or no spaces "a,b,c"), or (3) a list of
+      str, each of which is a file path/pattern or multiple file paths/patterns
+      separated by comma, or (4) a dictionary of the previous three approaches
+      for more advanced data mixing using named access. It should not be
+      specified when the following `tfds_name` is specified.
+    tfds_name: The name of the tensorflow dataset (TFDS). It should not be
+      specified when the above `input_path` is specified.
+    tfds_split: A str indicating which split of the data to load from TFDS. It
+      is required when above `tfds_name` is specified.
+    global_batch_size: The global batch size across all replicas.
+    is_training: Whether this data is used for training or not. This flag is
+      useful for consumers of this object to determine whether the data should
+      be repeated or shuffled.
+    drop_remainder: Whether the last batch should be dropped in the case it has
+      fewer than `global_batch_size` elements.
+    shuffle_buffer_size: The buffer size used for shuffling training data.
+    cache: Whether to cache dataset examples. If `True`, we will cache the
+      dataset after applying the decode_fn and parse_fn. It can be used to avoid
+      re-reading from disk, re-decoding and re-parsing the example on the second
+      epoch, but it requires significant memory overhead.
+    cycle_length: The number of files that will be processed concurrently when
+      interleaving files.
+    block_length: The number of consecutive elements to produce from each input
+      element before cycling to another input element when interleaving files.
+    deterministic: A boolean controlling whether determinism should be enforced.
+    sharding: Whether sharding is used in the input pipeline.
+    enable_tf_data_service: A boolean indicating whether to enable tf.data
+      service for the input pipeline.
+    tf_data_service_address: The URI of a tf.data service to offload
+      preprocessing onto during training. The URI should be in the format
+      "protocol://address", e.g. "grpc://tf-data-service:5050". It can be
+        overridden by `FLAGS.tf_data_service` flag in the binary.
+    tf_data_service_job_name: The name of the tf.data service job. This argument
+      makes it possible for multiple datasets to share the same job. The default
+      behavior is that the dataset creates anonymous, exclusively owned jobs.
+    tfds_data_dir: A str specifying the directory to read/write TFDS data.
+    tfds_as_supervised: A bool. When loading dataset from TFDS, if True, the
+      returned tf.data.Dataset will have a 2-tuple structure (input, label)
+      according to builder.info.supervised_keys; if False, the default, the
+      returned tf.data.Dataset will have a dictionary with all the features.
+    tfds_skip_decoding_feature: A str to indicate which features are skipped for
+      decoding when loading dataset from TFDS. Use comma to separate multiple
+      features. The main use case is to skip the image/video decoding for better
+      performance.
+    seed: An optional seed to use for deterministic shuffling/preprocessing.
+  """
+  input_path: Union[Sequence[str], str, base_config.Config] = ""
+  tfds_name: str = ""
+  tfds_split: str = ""
+  global_batch_size: int = 0
+  is_training: bool = None
+  drop_remainder: bool = True
+  shuffle_buffer_size: int = 100
+  cache: bool = False
+  cycle_length: Optional[int] = None
+  block_length: int = 1
+  deterministic: Optional[bool] = None
+  sharding: bool = True
+  enable_tf_data_service: bool = False
+  tf_data_service_address: Optional[str] = None
+  tf_data_service_job_name: Optional[str] = None
+  tfds_data_dir: str = ""
+  tfds_as_supervised: bool = False
+  tfds_skip_decoding_feature: str = ""
+  seed: Optional[int] = None
+@dataclasses.dataclass
+class RuntimeConfig(base_config.Config):
+  """High-level configurations for Runtime.
+  These include parameters that are not directly related to the experiment,
+  e.g. directories, accelerator type, etc.
+  Attributes:
+    distribution_strategy: e.g. 'mirrored', 'tpu', etc.
+    enable_xla: Whether or not to enable XLA.
+    per_gpu_thread_count: thread count per GPU.
+    gpu_thread_mode: Whether and how the GPU device uses its own threadpool.
+    dataset_num_private_threads: Number of threads for a private threadpool
+      created for all datasets computation.
+    tpu: The address of the TPU to use, if any.
+    num_gpus: The number of GPUs to use, if any.
+    worker_hosts: comma-separated list of worker ip:port pairs for running
+      multi-worker models with DistributionStrategy.
+    task_index: If multi-worker training, the task index of this worker.
+    all_reduce_alg: Defines the algorithm for performing all-reduce.
+    num_packs: Sets `num_packs` in the cross device ops used in
+      MirroredStrategy.  For details, see tf.distribute.NcclAllReduce.
+    mixed_precision_dtype: dtype of mixed precision policy. It can be 'float32',
+      'float16', or 'bfloat16'.
+    loss_scale: The type of loss scale, or 'float' value. This is used when
+      setting the mixed precision policy.
+    run_eagerly: Whether or not to run the experiment eagerly.
+    batchnorm_spatial_persistent: Whether or not to enable the spatial
+      persistent mode for CuDNN batch norm kernel for improved GPU performance.
+  """
+  distribution_strategy: str = "mirrored"
+  enable_xla: bool = False
+  gpu_thread_mode: Optional[str] = None
+  dataset_num_private_threads: Optional[int] = None
+  per_gpu_thread_count: int = 0
+  tpu: Optional[str] = None
+  num_gpus: int = 0
+  worker_hosts: Optional[str] = None
+  task_index: int = -1
+  all_reduce_alg: Optional[str] = None
+  num_packs: int = 1
+  mixed_precision_dtype: Optional[str] = None
+  loss_scale: Optional[Union[str, float]] = None
+  run_eagerly: bool = False
+  batchnorm_spatial_persistent: bool = False
+  # XLA runtime params.
+  # XLA params are only applied to the train_step.
+  # These augments can improve training speed. They can also improve eval, but
+  # may reduce usability and users would need to make changes to code.
+  # Whether to enable XLA dynamic padder
+  # infrastructure to handle dynamic shapes inputs inside XLA. True by
+  # default. Disabling this may cause correctness issues with dynamic shapes
+  # inputs, as XLA will just assume the inputs are with padded shapes. However
+  # users can optionally set it to False to improve device time if masking is
+  # already handled in the user side.
+  # If None, will respect XLA default.
+  tpu_enable_xla_dynamic_padder: Optional[bool] = None
+  # Global model parallelism configurations.
+  num_cores_per_replica: int = 1
+  default_shard_dim: int = -1
+  def model_parallelism(self):
+    return dict(
+        num_cores_per_replica=self.num_cores_per_replica,
+        default_shard_dim=self.default_shard_dim)
+@dataclasses.dataclass
+class TrainerConfig(base_config.Config):
+  """Configuration for trainer.
+  Attributes:
+    optimizer_config: optimizer config, it includes optimizer, learning rate,
+      and warmup schedule configs.
+    train_tf_while_loop: whether or not to use tf while loop.
+    train_tf_function: whether or not to use tf_function for training loop.
+    eval_tf_function: whether or not to use tf_function for eval.
+    allow_tpu_summary: Whether to allow summary happen inside the XLA program
+      runs on TPU through automatic outside compilation.
+    steps_per_loop: number of steps per loop to report training metrics. This
+      can also be used to reduce host worker communication in a TPU setup.
+    summary_interval: number of steps between each summary.
+    checkpoint_interval: number of steps between checkpoints.
+    max_to_keep: max checkpoints to keep.
+    continuous_eval_timeout: maximum number of seconds to wait between
+      checkpoints, if set to None, continuous eval will wait indefinitely. This
+      is only used continuous_train_and_eval and continuous_eval modes. Default
+      value is 1 hrs.
+    train_steps: number of train steps.
+    validation_steps: number of eval steps. If `None`, the entire eval dataset
+      is used.
+    validation_interval: number of training steps to run between evaluations.
+    best_checkpoint_export_subdir: if set, the trainer will keep track of the
+      best evaluation metric, and export the corresponding best checkpoint under
+      `model_dir/best_checkpoint_export_subdir`. Note that this only works if
+      mode contains eval (such as `train_and_eval`, `continuous_eval`, and
+      `continuous_train_and_eval`).
+    best_checkpoint_eval_metric: for exporting the best checkpoint, which
+      evaluation metric the trainer should monitor. This can be any evaluation
+      metric appears on tensorboard.
+    best_checkpoint_metric_comp: for exporting the best checkpoint, how the
+      trainer should compare the evaluation metrics. This can be either `higher`
+      (higher the better) or `lower` (lower the better).
+    validation_summary_subdir: A 'str', sub directory for saving eval summary.
+  """
+  optimizer_config: OptimizationConfig = OptimizationConfig()
+  # Orbit settings.
+  train_tf_while_loop: bool = True
+  train_tf_function: bool = True
+  eval_tf_function: bool = True
+  eval_tf_while_loop: bool = False
+  allow_tpu_summary: bool = False
+  # Trainer intervals.
+  steps_per_loop: int = 1000
+  summary_interval: int = 1000
+  checkpoint_interval: int = 1000
+  # Checkpoint manager.
+  max_to_keep: int = 5
+  continuous_eval_timeout: int = 60 * 60
+  # Train/Eval routines.
+  train_steps: int = 0
+  # Sets validation steps to be -1 to evaluate the entire dataset.
+  validation_steps: int = -1
+  validation_interval: int = 1000
+  # Best checkpoint export.
+  best_checkpoint_export_subdir: str = ""
+  best_checkpoint_eval_metric: str = ""
+  best_checkpoint_metric_comp: str = "higher"
+  # Blowup recovery.
+  loss_upper_bound: float = 1e6
+  recovery_begin_steps: int = 0  # Enforcing the loss bound after these steps.
+  # When max trials < 0, no recovery module; max trials = 0, we will check
+  # the condition and fail the job if the condition happens; max trials > 0,
+  # we will retore the model states.
+  recovery_max_trials: int = 0
+  validation_summary_subdir: str = "validation"
+@dataclasses.dataclass
+class TaskConfig(base_config.Config):
+  init_checkpoint: str = ""
+  model: Optional[base_config.Config] = None
+  train_data: DataConfig = DataConfig()
+  validation_data: DataConfig = DataConfig()
+  name: Optional[str] = None
+@dataclasses.dataclass
+class ExperimentConfig(base_config.Config):
+  """Top-level configuration."""
+  task: TaskConfig = TaskConfig()
+  trainer: TrainerConfig = TrainerConfig()
+  runtime: RuntimeConfig = RuntimeConfig()
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/exp_factory.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/exp_factory.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Experiment factory methods."""
+from official.core import config_definitions as cfg
+from official.core import registry
+_REGISTERED_CONFIGS = {}
+def register_config_factory(name):
+  """Register ExperimentConfig factory method."""
+  return registry.register(_REGISTERED_CONFIGS, name)
+def get_exp_config(exp_name: str) -> cfg.ExperimentConfig:
+  """Looks up the `ExperimentConfig` according to the `exp_name`."""
+  exp_creater = registry.lookup(_REGISTERED_CONFIGS, exp_name)
+  return exp_creater()
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/export_base.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/export_base.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Base class for model export."""
+import abc
+import functools
+from typing import Any, Callable, Dict, Mapping, List, Optional, Text, Union
+import tensorflow as tf
+from tensorflow.python.saved_model.model_utils import export_utils
+class ExportModule(tf.Module, metaclass=abc.ABCMeta):
+  """Base Export Module."""
+  def __init__(self,
+               params,
+               model: Union[tf.Module, tf.keras.Model],
+               inference_step: Optional[Callable[..., Any]] = None,
+               *,
+               preprocessor: Optional[Callable[..., Any]] = None,
+               postprocessor: Optional[Callable[..., Any]] = None):
+    """Instantiates an ExportModel.
+    Examples:
+    `inference_step` must be a function that has `model` as an kwarg or the
+    second positional argument.
+    ```
+    def _inference_step(inputs, model=None):
+      return model(inputs, training=False)
+    module = ExportModule(params, model, inference_step=_inference_step)
+    ```
+    `preprocessor` and `postprocessor` could be either functions or `tf.Module`.
+    The usages of preprocessor and postprocessor are managed by the
+    implementation of `serve()` method.
+    Args:
+      params: A dataclass for parameters to the module.
+      model: A model instance which contains weights and forward computation.
+      inference_step: An optional callable to forward-pass the model. If not
+        specified, it creates a parital function with `model` as an required
+        kwarg.
+      preprocessor: An optional callable to preprocess the inputs.
+      postprocessor: An optional callable to postprocess the model outputs.
+    """
+    super().__init__(name=None)
+    self.model = model
+    self.params = params
+    if inference_step is not None:
+      self.inference_step = functools.partial(inference_step, model=self.model)
+    else:
+      self.inference_step = functools.partial(
+          self.model.__call__, training=False)
+    self.preprocessor = preprocessor
+    self.postprocessor = postprocessor
+  @abc.abstractmethod
+  def serve(self) -> Mapping[Text, tf.Tensor]:
+    """The bare inference function which should run on all devices.
+    Expecting tensors are passed in through keyword arguments. Returns a
+    dictionary of tensors, when the keys will be used inside the SignatureDef.
+    """
+  @abc.abstractmethod
+  def get_inference_signatures(
+      self, function_keys: Dict[Text, Text]) -> Mapping[Text, Any]:
+    """Get defined function signatures."""
+def export(export_module: ExportModule,
+           function_keys: Union[List[Text], Dict[Text, Text]],
+           export_savedmodel_dir: Text,
+           checkpoint_path: Optional[Text] = None,
+           timestamped: bool = True,
+           save_options: Optional[tf.saved_model.SaveOptions] = None) -> Text:
+  """Exports to SavedModel format.
+  Args:
+    export_module: a ExportModule with the keras Model and serving tf.functions.
+    function_keys: a list of string keys to retrieve pre-defined serving
+      signatures. The signaute keys will be set with defaults. If a dictionary
+      is provided, the values will be used as signature keys.
+    export_savedmodel_dir: Output saved model directory.
+    checkpoint_path: Object-based checkpoint path or directory.
+    timestamped: Whether to export the savedmodel to a timestamped directory.
+    save_options: `SaveOptions` for `tf.saved_model.save`.
+  Returns:
+    The savedmodel directory path.
+  """
+  ckpt_dir_or_file = checkpoint_path
+  if ckpt_dir_or_file is not None and tf.io.gfile.isdir(ckpt_dir_or_file):
+    ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
+  if ckpt_dir_or_file:
+    checkpoint = tf.train.Checkpoint(model=export_module.model)
+    checkpoint.read(
+        ckpt_dir_or_file).assert_existing_objects_matched().expect_partial()
+  if isinstance(function_keys, list):
+    if len(function_keys) == 1:
+      function_keys = {
+          function_keys[0]: tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+      }
+    else:
+      raise ValueError(
+          "If the function_keys is a list, it must contain a single element. %s"
+          % function_keys)
+  signatures = export_module.get_inference_signatures(function_keys)
+  if timestamped:
+    export_dir = export_utils.get_timestamped_export_dir(
+        export_savedmodel_dir).decode("utf-8")
+  else:
+    export_dir = export_savedmodel_dir
+  tf.saved_model.save(
+      export_module, export_dir, signatures=signatures, options=save_options)
+  return export_dir
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/export_base_test.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/core/export_base_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for official.core.export_base."""
+import os
+from typing import Any, Dict, Mapping, Text
+import tensorflow as tf
+from official.core import export_base
+class TestModule(export_base.ExportModule):
+  @tf.function
+  def serve(self, inputs: tf.Tensor) -> Mapping[Text, tf.Tensor]:
+    x = inputs if self.preprocessor is None else self.preprocessor(
+        inputs=inputs)
+    x = self.inference_step(x)
+    x = self.postprocessor(x) if self.postprocessor else x
+    return {'outputs': x}
+  def get_inference_signatures(
+      self, function_keys: Dict[Text, Text]) -> Mapping[Text, Any]:
+    input_signature = tf.TensorSpec(shape=[None, None], dtype=tf.float32)
+    return {'foo': self.serve.get_concrete_function(input_signature)}
+class ExportBaseTest(tf.test.TestCase):
+  def test_export_module(self):
+    tmp_dir = self.get_temp_dir()
+    model = tf.keras.layers.Dense(2)
+    inputs = tf.ones([2, 4], tf.float32)
+    expected_output = model(inputs, training=False)
+    module = TestModule(params=None, model=model)
+    ckpt_path = tf.train.Checkpoint(model=model).save(
+        os.path.join(tmp_dir, 'ckpt'))
+    export_dir = export_base.export(
+        module, ['foo'],
+        export_savedmodel_dir=tmp_dir,
+        checkpoint_path=ckpt_path,
+        timestamped=True)
+    self.assertTrue(os.path.exists(os.path.join(export_dir, 'saved_model.pb')))
+    self.assertTrue(
+        os.path.exists(
+            os.path.join(export_dir, 'variables', 'variables.index')))
+    self.assertTrue(
+        os.path.exists(
+            os.path.join(export_dir, 'variables',
+                         'variables.data-00000-of-00001')))
+    imported = tf.saved_model.load(export_dir)
+    output = imported.signatures['foo'](inputs)
+    self.assertAllClose(output['outputs'].numpy(), expected_output.numpy())
+  def test_custom_inference_step(self):
+    tmp_dir = self.get_temp_dir()
+    model = tf.keras.layers.Dense(2)
+    inputs = tf.ones([2, 4], tf.float32)
+    def _inference_step(inputs, model):
+      return tf.nn.softmax(model(inputs, training=False))
+    module = TestModule(
+        params=None, model=model, inference_step=_inference_step)
+    expected_output = _inference_step(inputs, model)
+    ckpt_path = tf.train.Checkpoint(model=model).save(
+        os.path.join(tmp_dir, 'ckpt'))
+    export_dir = export_base.export(
+        module, ['foo'],
+        export_savedmodel_dir=tmp_dir,
+        checkpoint_path=ckpt_path,
+        timestamped=False)
+    imported = tf.saved_model.load(export_dir)
+    output = imported.signatures['foo'](inputs)
+    self.assertAllClose(output['outputs'].numpy(), expected_output.numpy())
+  def test_processors(self):
+    model = tf.Module()
+    inputs = tf.zeros((), tf.float32)
+    def _inference_step(inputs, model):
+      del model
+      return inputs + 1.0
+    def _preprocessor(inputs):
+      print(inputs)
+      return inputs + 0.1
+    module = TestModule(
+        params=None,
+        model=model,
+        inference_step=_inference_step,
+        preprocessor=_preprocessor)
+    output = module.serve(inputs)
+    self.assertAllClose(output['outputs'].numpy(), 1.1)
+    class _PostProcessor(tf.Module):
+      def __call__(self, inputs):
+        return inputs + 0.01
+    module = TestModule(
+        params=None,
+        model=model,
+        inference_step=_inference_step,
+        preprocessor=_preprocessor,
+        postprocessor=_PostProcessor())
+    output = module.serve(inputs)
+    self.assertAllClose(output['outputs'].numpy(), 1.11)
+if __name__ == '__main__':
+  tf.test.main()