update TensorFlow2x test method

a32ffa95 · qianyj · e286da17 · a32ffa95 · a32ffa95 · a32ffa95
Commit a32ffa95 authored Feb 03, 2023 by qianyj
20 changed files
--- a/TensorFlow2x/ComputeVision/Classification/models-master/LICENSE
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/LICENSE
+Copyright 2016 The TensorFlow Authors.  All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2016, The Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/TensorFlow2x/ComputeVision/Classification/models-master/README.md
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/README.md
+![Logo](https://storage.googleapis.com/tf_model_garden/tf_model_garden_logo.png)
+
+# Welcome to the Model Garden for TensorFlow
+
+The TensorFlow Model Garden is a repository with a number of different
+implementations of state-of-the-art (SOTA) models and modeling solutions for
+TensorFlow users. We aim to demonstrate the best practices for modeling so that
+TensorFlow users can take full advantage of TensorFlow for their research and
+product development.
+
+To improve the transparency and reproducibility of our models, training logs on
+[TensorBoard.dev](https://tensorboard.dev) are also provided for models to the
+extent possible though not all models are suitable.
+
+| Directory | Description |
+|-----------|-------------|
+| [official](official) | • A collection of example implementations for SOTA models using the latest TensorFlow 2's high-level APIs<br />• Officially maintained, supported, and kept up to date with the latest TensorFlow 2 APIs by TensorFlow<br />• Reasonably optimized for fast performance while still being easy to read |
+| [research](research) | • A collection of research model implementations in TensorFlow 1 or 2 by researchers<br />• Maintained and supported by researchers |
+| [community](community) | • A curated list of the GitHub repositories with machine learning models and implementations powered by TensorFlow 2 |
+| [orbit](orbit) | • A flexible and lightweight library that users can easily use or fork when writing customized training loop code in TensorFlow 2.x. It seamlessly integrates with `tf.distribute` and supports running on different device types (CPU, GPU, and TPU). |
+
+## [Announcements](https://github.com/tensorflow/models/wiki/Announcements)
+
+## Contributions
+
+[![help wanted:paper implementation](https://img.shields.io/github/issues/tensorflow/models/help%20wanted%3Apaper%20implementation)](https://github.com/tensorflow/models/labels/help%20wanted%3Apaper%20implementation)
+
+If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).
+
+## License
+
+[Apache License 2.0](LICENSE)
+
+## Citing TensorFlow Model Garden
+
+If you use TensorFlow Model Garden in your research, please cite this repository.
+
+```
+@misc{tensorflowmodelgarden2020,
+  author = {Hongkun Yu and Chen Chen and Xianzhi Du and Yeqing Li and
+            Abdullah Rashwan and Le Hou and Pengchong Jin and Fan Yang and
+            Frederick Liu and Jaeyoun Kim and Jing Li},
+  title = {{TensorFlow Model Garden}},
+  howpublished = {\url{https://github.com/tensorflow/models}},
+  year = {2020}
+}
+```
--- a/TensorFlow2x/ComputeVision/Classification/models-master/community/README.md
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/community/README.md
+![Logo](https://storage.googleapis.com/tf_model_garden/tf_model_garden_logo.png)
+
+# TensorFlow Community Models
+
+This repository provides a curated list of the GitHub repositories with machine learning models and implementations powered by TensorFlow 2.
+
+**Note**: Contributing companies or individuals are responsible for maintaining their repositories.
+
+## Computer Vision
+
+### Image Recognition
+
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [DenseNet 169](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/densenet169) | [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993) | • FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [Inception V3](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/inceptionv3) | [Rethinking the Inception Architecture<br/>for Computer Vision](https://arxiv.org/pdf/1512.00567.pdf) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [Inception V4](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/inceptionv4) | [Inception-v4, Inception-ResNet and the Impact<br/>of Residual Connections on Learning](https://arxiv.org/pdf/1602.07261) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [MobileNet V1](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/mobilenet_v1) | [MobileNets: Efficient Convolutional Neural Networks<br/>for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [ResNet 101](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet101) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [ResNet 50](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet50) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [ResNet 50v1.5](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet50v1_5) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference<br/>• FP32 Inference<br/>• FP32 Training | [Intel](https://github.com/IntelAI) |
+| [EfficientNet](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Classification/ConvNets/efficientnet) | [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/pdf/1905.11946.pdf) | • Automatic mixed precision<br/>• Horovod Multi-GPU training (NCCL)<br/>• Multi-node training on a Pyxis/Enroot Slurm cluster<br/>• XLA | [NVIDIA](https://github.com/NVIDIA) |
+
+### Object Detection
+
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [R-FCN](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/rfcn) | [R-FCN: Object Detection<br/>via Region-based Fully Convolutional Networks](https://arxiv.org/pdf/1605.06409) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [SSD-MobileNet](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/ssd-mobilenet) | [MobileNets: Efficient Convolutional Neural Networks<br/>for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [SSD-ResNet34](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/ssd-resnet34) | [SSD: Single Shot MultiBox Detector](https://arxiv.org/pdf/1512.02325) | • Int8 Inference<br/>• FP32 Inference<br/>• FP32 Training | [Intel](https://github.com/IntelAI) |
+
+### Segmentation
+
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [Mask R-CNN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN) | [Mask R-CNN](https://arxiv.org/abs/1703.06870) | • Automatic Mixed Precision<br/>• Multi-GPU training support with Horovod<br/>• TensorRT | [NVIDIA](https://github.com/NVIDIA) |
+| [U-Net Medical Image Segmentation](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical) | [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) | • Automatic Mixed Precision<br/>• Multi-GPU training support with Horovod<br/>• TensorRT | [NVIDIA](https://github.com/NVIDIA) |
+
+## Natural Language Processing
+
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [BERT](https://github.com/IntelAI/models/tree/master/benchmarks/language_modeling/tensorflow/bert_large) | [BERT: Pre-training of Deep Bidirectional Transformers<br/>for Language Understanding](https://arxiv.org/pdf/1810.04805) | • FP32 Inference<br/>• FP32 Training | [Intel](https://github.com/IntelAI) |
+| [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/LanguageModeling/BERT) | [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/pdf/1810.04805) | • Horovod Multi-GPU<br/>• Multi-node with Horovod and Pyxis/Enroot Slurm cluster<br/>• XLA<br/>• Automatic mixed precision<br/>• LAMB | [NVIDIA](https://github.com/NVIDIA) |
+| [ELECTRA](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/LanguageModeling/ELECTRA) | [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/forum?id=r1xMH1BtvB) | • Automatic Mixed Precision<br/>• Multi-GPU training support with Horovod<br/>• Multi-node training on a Pyxis/Enroot Slurm cluster | [NVIDIA](https://github.com/NVIDIA) |
+| [GNMT](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/mlperf_gnmt) | [Google’s Neural Machine Translation System:<br/>Bridging the Gap between Human and Machine Translation](https://arxiv.org/pdf/1609.08144) | • FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [Transformer-LT (Official)](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/transformer_lt_official) | [Attention Is All You Need](https://arxiv.org/pdf/1706.03762) | • FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [Transformer-LT (MLPerf)](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/transformer_mlperf) | [Attention Is All You Need](https://arxiv.org/pdf/1706.03762) | • FP32 Training | [Intel](https://github.com/IntelAI) |
+
+## Recommendation Systems
+
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [Wide & Deep](https://github.com/IntelAI/models/tree/master/benchmarks/recommendation/tensorflow/wide_deep_large_ds) | [Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792) | • FP32 Inference<br/>• FP32 Training | [Intel](https://github.com/IntelAI) |
+| [Wide & Deep](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/WideAndDeep) | [Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792) | • Automatic mixed precision<br/>• Multi-GPU training support with Horovod<br/>• XLA | [NVIDIA](https://github.com/NVIDIA) |
+| [DLRM](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/DLRM) | [Deep Learning Recommendation Model for Personalization and Recommendation Systems](https://arxiv.org/pdf/1906.00091.pdf) | • Automatic Mixed Precision<br/>• Hybrid-parallel multiGPU training using Horovod all2all<br/>• Multinode training for Pyxis/Enroot Slurm clusters<br/>• XLA<br/>• Criteo dataset preprocessing with Spark on GPU | [NVIDIA](https://github.com/NVIDIA) |
+
+## Contributions
+
+If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/LICENSE
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/LICENSE
+Copyright 2015 The TensorFlow Authors.  All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2015, The TensorFlow Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/README-TPU.md
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/README-TPU.md
+# Offically Supported TensorFlow 2.1+ Models on Cloud TPU
+
+## Natural Language Processing
+
+*   [bert](nlp/bert): A powerful pre-trained language representation model:
+    BERT, which stands for Bidirectional Encoder Representations from
+    Transformers.
+    [BERT FineTuning with Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/bert-2.x) provides step by step instructions on Cloud TPU training. You can look [Bert MNLI Tensorboard.dev metrics](https://tensorboard.dev/experiment/LijZ1IrERxKALQfr76gndA) for MNLI fine tuning task.
+*   [transformer](nlp/transformer): A transformer model to translate the WMT
+    English to German dataset.
+        [Training transformer on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/transformer-2.x) for step by step instructions on Cloud TPU training.
+
+## Computer Vision
+
+*   [efficientnet](vision/image_classification): A family of convolutional
+    neural networks that scale by balancing network depth, width, and
+    resolution and can be used to classify ImageNet's dataset of 1000 classes.
+    See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/KnaWjrq5TXGfv0NW5m7rpg/#scalars).
+*   [mnist](vision/image_classification): A basic model to classify digits
+    from the MNIST dataset. See [Running MNIST on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/mnist-2.x) tutorial and [Tensorboard.dev metrics](https://tensorboard.dev/experiment/mIah5lppTASvrHqWrdr6NA).
+*   [mask-rcnn](vision/detection): An object detection and instance segmentation model. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/LH7k0fMsRwqUAcE09o9kPA).
+*   [resnet](vision/image_classification): A deep residual network that can
+    be used to classify ImageNet's dataset of 1000 classes.
+    See [Training ResNet on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/resnet-2.x) tutorial and [Tensorboard.dev metrics](https://tensorboard.dev/experiment/CxlDK8YMRrSpYEGtBRpOhg).
+*   [retinanet](vision/detection): A fast and powerful object detector. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/b8NRnWU3TqG6Rw0UxueU6Q).
+*   [shapemask](vision/detection): An object detection and instance segmentation model using shape priors. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/ZbXgVoc6Rf6mBRlPj0JpLA).
+
+## Recommendation
+*   [dlrm](recommendation/ranking): [Deep Learning Recommendation Model for
+Personalization and Recommendation Systems](https://arxiv.org/abs/1906.00091).
+*   [dcn v2](recommendation/ranking): [Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535).
+*   [ncf](recommendation): Neural Collaborative Filtering. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/0k3gKjZlR1ewkVTRyLB6IQ).
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/README.md
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/README.md
+![Logo](https://storage.googleapis.com/model_garden_artifacts/TF_Model_Garden.png)
+
+# TensorFlow Official Models
+
+The TensorFlow official models are a collection of models
+that use TensorFlow’s high-level APIs.
+They are intended to be well-maintained, tested, and kept up to date
+with the latest TensorFlow API.
+
+They should also be reasonably optimized for fast performance while still
+being easy to read.
+These models are used as end-to-end tests, ensuring that the models run
+with the same or improved speed and performance with each new TensorFlow build.
+
+## More models to come!
+
+The team is actively developing new models.
+In the near future, we will add:
+
+* State-of-the-art language understanding models.
+* State-of-the-art image classification models.
+* State-of-the-art object detection and instance segmentation models.
+
+## Table of Contents
+
+- [Models and Implementations](#models-and-implementations)
+  * [Computer Vision](#computer-vision)
+    + [Image Classification](#image-classification)
+    + [Object Detection and Segmentation](#object-detection-and-segmentation)
+  * [Natural Language Processing](#natural-language-processing)
+  * [Recommendation](#recommendation)
+- [How to get started with the official models](#how-to-get-started-with-the-official-models)
+
+## Models and Implementations
+
+### Computer Vision
+
+#### Image Classification
+
+| Model | Reference (Paper) |
+|-------|-------------------|
+| [MNIST](vision/image_classification) | A basic model to classify digits from the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) |
+| [ResNet](vision/beta/MODEL_GARDEN.md) | [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) |
+| [ResNet-RS](vision/beta/MODEL_GARDEN.md) | [Revisiting ResNets: Improved Training and Scaling Strategies](https://arxiv.org/abs/2103.07579) |
+| [EfficientNet](vision/image_classification) | [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) |
+
+#### Object Detection and Segmentation
+
+| Model | Reference (Paper) |
+|-------|-------------------|
+| [RetinaNet](vision/beta/MODEL_GARDEN.md) | [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) |
+| [Mask R-CNN](vision/beta/MODEL_GARDEN.md) | [Mask R-CNN](https://arxiv.org/abs/1703.06870) |
+| [ShapeMask](vision/detection) | [ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors](https://arxiv.org/abs/1904.03239) |
+| [SpineNet](vision/beta/MODEL_GARDEN.md) | [SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization](https://arxiv.org/abs/1912.05027) |
+| [Cascade RCNN-RS and RetinaNet-RS](vision/beta/MODEL_GARDEN.md) | [Simple Training Strategies and Model Scaling for Object Detection](https://arxiv.org/abs/2107.00057)|
+
+### Natural Language Processing
+
+| Model | Reference (Paper) |
+|-------|-------------------|
+| [ALBERT (A Lite BERT)](nlp/albert) | [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942) |
+| [BERT (Bidirectional Encoder Representations from Transformers)](nlp/bert) | [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) |
+| [NHNet (News Headline generation model)](projects/nhnet) | [Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386) |
+| [Transformer](nlp/transformer) | [Attention Is All You Need](https://arxiv.org/abs/1706.03762) |
+| [XLNet](nlp/xlnet) | [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) |
+| [MobileBERT](nlp/projects/mobilebert) | [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) |
+
+### Recommendation
+
+Model                            | Reference (Paper)
+-------------------------------- | -----------------
+[DLRM](recommendation/ranking)   | [Deep Learning Recommendation Model for Personalization and Recommendation Systems](https://arxiv.org/abs/1906.00091)
+[DCN v2](recommendation/ranking) | [Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535)
+[NCF](recommendation)            | [Neural Collaborative Filtering](https://arxiv.org/abs/1708.05031)
+
+## How to get started with the official models
+
+* The models in the master branch are developed using TensorFlow 2,
+and they target the TensorFlow [nightly binaries](https://github.com/tensorflow/tensorflow#installation)
+built from the
+[master branch of TensorFlow](https://github.com/tensorflow/tensorflow/tree/master).
+* The stable versions targeting releases of TensorFlow are available
+as tagged branches or [downloadable releases](https://github.com/tensorflow/models/releases).
+* Model repository version numbers match the target TensorFlow release,
+such that
+[release v2.5.0](https://github.com/tensorflow/models/releases/tag/v2.5.0)
+are compatible with
+[TensorFlow v2.5.0](https://github.com/tensorflow/tensorflow/releases/tag/v2.5.0).
+
+Please follow the below steps before running models in this repository.
+
+### Requirements
+
+* The latest TensorFlow Model Garden release and TensorFlow 2
+  * If you are on a version of TensorFlow earlier than 2.2, please
+upgrade your TensorFlow to [the latest TensorFlow 2](https://www.tensorflow.org/install/).
+
+```shell
+pip3 install tf-nightly
+```
+
+* Python 3.7+
+
+Our integration tests run with Python 3.7. Although Python 3.6 should work, we
+don't recommend earlier versions.
+
+### Installation
+
+#### Method 1: Install the TensorFlow Model Garden pip package
+
+**tf-models-official** is the stable Model Garden package.
+pip will install all models and dependencies automatically.
+
+```shell
+pip install tf-models-official
+```
+
+If you are using nlp packages, please also install **tensorflow-text**:
+
+```shell
+pip install tensorflow-text
+```
+
+Please check out our [example](colab/fine_tuning_bert.ipynb)
+to learn how to use a PIP package.
+
+Note that **tf-models-official** may not include the latest changes in this
+github repo. To include latest changes, you may install **tf-models-nightly**,
+which is the nightly Model Garden package created daily automatically.
+
+```shell
+pip install tf-models-nightly
+```
+
+#### Method 2: Clone the source
+
+1. Clone the GitHub repository:
+
+```shell
+git clone https://github.com/tensorflow/models.git
+```
+
+2. Add the top-level ***/models*** folder to the Python path.
+
+```shell
+export PYTHONPATH=$PYTHONPATH:/path/to/models
+```
+
+If you are using a Colab notebook, please set the Python path with os.environ.
+
+```python
+import os
+os.environ['PYTHONPATH'] += ":/path/to/models"
+```
+
+3. Install other dependencies
+
+```shell
+pip3 install --user -r official/requirements.txt
+```
+
+Finally, if you are using nlp packages, please also install
+**tensorflow-text-nightly**:
+
+```shell
+pip3 install tensorflow-text-nightly
+```
+
+## Contributions
+
+If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/__init__.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/colab/decoding_api_in_tf_nlp.ipynb
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/colab/decoding_api_in_tf_nlp.ipynb
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vXLA5InzXydn"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "RuRlpLL-X0R_"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fsACVQpVSifi"
+      },
+      "source": [
+        "### Install the TensorFlow Model Garden pip package\n",
+        "\n",
+        "*  `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
+        "which is the nightly Model Garden package created daily automatically.\n",
+        "*  pip will install all models and dependencies automatically."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hYEwGTeCXnnX"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/tutorials/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2j-xhrsVQOQT"
+      },
+      "outputs": [],
+      "source": [
+        "pip install  tf-models-nightly"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BjP7zwxmskpY"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from official import nlp\n",
+        "from official.nlp.modeling.ops import sampling_module\n",
+        "from official.nlp.modeling.ops import beam_search"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0AWgyo-IQ5sP"
+      },
+      "source": [
+        "# Decoding API\n",
+        "This API provides an interface to experiment with different decoding strategies used for auto-regressive models.\n",
+        "\n",
+        "1. The following sampling strategies are provided in sampling_module.py, which inherits from the base Decoding class:\n",
+        "  *   [top_p](https://arxiv.org/abs/1904.09751) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L65) \n",
+        "\n",
+        "      This implementation chooses most probable logits with cumulative probabilities upto top_p.\n",
+        "\n",
+        "  *   [top_k](https://arxiv.org/pdf/1805.04833.pdf) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L48)\n",
+        "\n",
+        "      At each timestep, this implementation samples from top-k logits based on their probability distribution\n",
+        "\n",
+        "  *   Greedy : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L26)\n",
+        "\n",
+        "      This implementation returns the top logits based on probabilities.\n",
+        "\n",
+        "2. Beam search is provided in beam_search.py. [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search.py)\n",
+        "\n",
+        "      This implementation reduces the risk of missing hidden high probability logits by keeping the most likely num_beams of logits at each time step and eventually choosing the logits that has the overall highest probability."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfOj7oaBRQnS"
+      },
+      "source": [
+        "## Initialize Sampling Module in TF-NLP.\n",
+        "\n",
+        "\n",
+        "\u003e **symbols_to_logits_fn** : This is a closure implemented by the users of the API. The input to this closure will be  \n",
+        "```\n",
+        "Args:\n",
+        "  1] ids [batch_size, .. (index + 1 or 1 if padded_decode is True)],\n",
+        "  2] index [scalar] : current decoded step,\n",
+        "  3] cache [nested dictionary of tensors].\n",
+        "Returns:\n",
+        "  1] tensor for next-step logits [batch_size, vocab]\n",
+        "  2] the updated_cache [nested dictionary of tensors].\n",
+        "```\n",
+        "This closure calls the model to predict the logits for the 'index+1' step. The cache is used for faster decoding.\n",
+        "Here is a [reference](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search_test.py#L88) implementation for the above closure.\n",
+        "\n",
+        "\n",
+        "\u003e **length_normalization_fn** : Closure for returning length normalization parameter.\n",
+        "```\n",
+        "Args: \n",
+        "  1] length : scalar for decoded step index.\n",
+        "  2] dtype : data-type of output tensor\n",
+        "Returns:\n",
+        "  1] value of length normalization factor.\n",
+        "Example :\n",
+        "  def _length_norm(length, dtype):\n",
+        "    return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)\n",
+        "```\n",
+        "\n",
+        "\u003e **vocab_size** : Output vocabulary size.\n",
+        "\n",
+        "\u003e **max_decode_length** : Scalar for total number of decoding steps.\n",
+        "\n",
+        "\u003e **eos_id** : Decoding will stop if all output decoded ids in the batch have this ID.\n",
+        "\n",
+        "\u003e **padded_decode** : Set this to True if running on TPU. Tensors are padded to max_decoding_length if this is True.\n",
+        "\n",
+        "\u003e **top_k** : top_k is enabled if this value is \u003e 1.\n",
+        "\n",
+        "\u003e **top_p** : top_p is enabled if this value is \u003e 0 and \u003c 1.0\n",
+        "\n",
+        "\u003e **sampling_temperature** : This is used to re-estimate the softmax output. Temperature skews the distribution towards high probability tokens and lowers the mass in tail distribution. Value has to be positive. Low temperature is equivalent to greedy and makes the distribution sharper, while high temperature makes it more flat.\n",
+        "\n",
+        "\u003e **enable_greedy** : By default, this is true and greedy decoding is enabled.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lV1RRp6ihnGX"
+      },
+      "source": [
+        "# Initialize the Model Hyper-parameters"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eTsGp2gaKLdE"
+      },
+      "outputs": [],
+      "source": [
+        "params = {}\n",
+        "params['num_heads'] = 2\n",
+        "params['num_layers'] = 2\n",
+        "params['batch_size'] = 2\n",
+        "params['n_dims'] = 256\n",
+        "params['max_decode_length'] = 4"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UGvmd0_dRFYI"
+      },
+      "source": [
+        "## What is a Cache?\n",
+        "In auto-regressive architectures like Transformer based [Encoder-Decoder](https://arxiv.org/abs/1706.03762) models, \n",
+        "Cache is used for fast sequential decoding.\n",
+        "It is a nested dictionary storing pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention blocks) for every layer.\n",
+        "\n",
+        "```\n",
+        "{\n",
+        "    'layer_%d' % layer: {\n",
+        "        'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32),\n",
+        "        'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32)\n",
+        "        } for layer in range(params['num_layers']),\n",
+        "    'model_specific_item' : Model specific tensor shape,\n",
+        "}\n",
+        "\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CYXkoplAij01"
+      },
+      "source": [
+        "# Initialize cache. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "D6kfZOOKgkm1"
+      },
+      "outputs": [],
+      "source": [
+        "cache = {\n",
+        "    'layer_%d' % layer: {\n",
+        "        'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32),\n",
+        "        'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32)\n",
+        "        } for layer in range(params['num_layers'])\n",
+        "    }\n",
+        "print(\"cache key shape for layer 1 :\", cache['layer_1']['k'].shape)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nNY3Xn8SiblP"
+      },
+      "source": [
+        "# Define closure for length normalization. **optional.**\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "T92ccAzlnGqh"
+      },
+      "outputs": [],
+      "source": [
+        "def length_norm(length, dtype):\n",
+        "  \"\"\"Return length normalization factor.\"\"\"\n",
+        "  return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "syl7I5nURPgW"
+      },
+      "source": [
+        "# Create model_fn\n",
+        "  In practice, this will be replaced by an actual model implementation such as [here](https://github.com/tensorflow/models/blob/master/official/nlp/transformer/transformer.py#L236)\n",
+        "```\n",
+        "Args:\n",
+        "i : Step that is being decoded.\n",
+        "Returns:\n",
+        "  logit probabilities of size [batch_size, 1, vocab_size]\n",
+        "```\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AhzSkRisRdB6"
+      },
+      "outputs": [],
+      "source": [
+        "probabilities = tf.constant([[[0.3, 0.4, 0.3], [0.3, 0.3, 0.4],\n",
+        "                              [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]],\n",
+        "                            [[0.2, 0.5, 0.3], [0.2, 0.7, 0.1],\n",
+        "                              [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]]])\n",
+        "def model_fn(i):\n",
+        "  return probabilities[:, i, :]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DBMUkaVmVZBg"
+      },
+      "source": [
+        "# Initialize symbols_to_logits_fn\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FAJ4CpbfVdjr"
+      },
+      "outputs": [],
+      "source": [
+        "def _symbols_to_logits_fn():\n",
+        "  \"\"\"Calculates logits of the next tokens.\"\"\"\n",
+        "  def symbols_to_logits_fn(ids, i, temp_cache):\n",
+        "    del ids\n",
+        "    logits = tf.cast(tf.math.log(model_fn(i)), tf.float32)\n",
+        "    return logits, temp_cache\n",
+        "  return symbols_to_logits_fn"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "R_tV3jyWVL47"
+      },
+      "source": [
+        "# Greedy \n",
+        "Greedy decoding selects the token id with the highest probability as its next id: $id_t = argmax_{w}P(id | id_{1:t-1})$ at each timestep $t$. The following sketch shows greedy decoding. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aGt9idSkVQEJ"
+      },
+      "outputs": [],
+      "source": [
+        "greedy_obj = sampling_module.SamplingModule(\n",
+        "    length_normalization_fn=None,\n",
+        "    dtype=tf.float32,\n",
+        "    symbols_to_logits_fn=_symbols_to_logits_fn(),\n",
+        "    vocab_size=3,\n",
+        "    max_decode_length=params['max_decode_length'],\n",
+        "    eos_id=10,\n",
+        "    padded_decode=False)\n",
+        "ids, _ = greedy_obj.generate(\n",
+        "    initial_ids=tf.constant([9, 1]), initial_cache=cache)\n",
+        "print(\"Greedy Decoded Ids:\", ids)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s4pTTsQXVz5O"
+      },
+      "source": [
+        "# top_k sampling\n",
+        "In *Top-K* sampling, the *K* most likely next token ids are filtered and the probability mass is redistributed among only those *K* ids. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pCLWIn6GV5_G"
+      },
+      "outputs": [],
+      "source": [
+        "top_k_obj = sampling_module.SamplingModule(\n",
+        "    length_normalization_fn=length_norm,\n",
+        "    dtype=tf.float32,\n",
+        "    symbols_to_logits_fn=_symbols_to_logits_fn(),\n",
+        "    vocab_size=3,\n",
+        "    max_decode_length=params['max_decode_length'],\n",
+        "    eos_id=10,\n",
+        "    sample_temperature=tf.constant(1.0),\n",
+        "    top_k=tf.constant(3),\n",
+        "    padded_decode=False,\n",
+        "    enable_greedy=False)\n",
+        "ids, _ = top_k_obj.generate(\n",
+        "    initial_ids=tf.constant([9, 1]), initial_cache=cache)\n",
+        "print(\"top-k sampled Ids:\", ids)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Jp3G-eE_WI4Y"
+      },
+      "source": [
+        "# top_p sampling\n",
+        "Instead of sampling only from the most likely *K* token ids, in *Top-p* sampling chooses from the smallest possible set of ids whose cumulative probability exceeds the probability *p*."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rEGdIWcuWILO"
+      },
+      "outputs": [],
+      "source": [
+        "top_p_obj = sampling_module.SamplingModule(\n",
+        "    length_normalization_fn=length_norm,\n",
+        "    dtype=tf.float32,\n",
+        "    symbols_to_logits_fn=_symbols_to_logits_fn(),\n",
+        "    vocab_size=3,\n",
+        "    max_decode_length=params['max_decode_length'],\n",
+        "    eos_id=10,\n",
+        "    sample_temperature=tf.constant(1.0),\n",
+        "    top_p=tf.constant(0.9),\n",
+        "    padded_decode=False,\n",
+        "    enable_greedy=False)\n",
+        "ids, _ = top_p_obj.generate(\n",
+        "    initial_ids=tf.constant([9, 1]), initial_cache=cache)\n",
+        "print(\"top-p sampled Ids:\", ids)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2hcuyJ2VWjDz"
+      },
+      "source": [
+        "# Beam search decoding\n",
+        "Beam search reduces the risk of missing hidden high probability token ids by keeping the most likely num_beams of hypotheses at each time step and eventually choosing the hypothesis that has the overall highest probability. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cJ3WzvSrWmSA"
+      },
+      "outputs": [],
+      "source": [
+        "beam_size = 2\n",
+        "params['batch_size'] = 1\n",
+        "beam_cache = {\n",
+        "    'layer_%d' % layer: {\n",
+        "        'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32),\n",
+        "        'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32)\n",
+        "        } for layer in range(params['num_layers'])\n",
+        "    }\n",
+        "print(\"cache key shape for layer 1 :\", beam_cache['layer_1']['k'].shape)\n",
+        "ids, _ = beam_search.sequence_beam_search(\n",
+        "    symbols_to_logits_fn=_symbols_to_logits_fn(),\n",
+        "    initial_ids=tf.constant([9], tf.int32),\n",
+        "    initial_cache=beam_cache,\n",
+        "    vocab_size=3,\n",
+        "    beam_size=beam_size,\n",
+        "    alpha=0.6,\n",
+        "    max_decode_length=params['max_decode_length'],\n",
+        "    eos_id=10,\n",
+        "    padded_decode=False,\n",
+        "    dtype=tf.float32)\n",
+        "print(\"Beam search ids:\", ids)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "decoding_api_in_tf_nlp.ipynb",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/colab/nlp/customize_encoder.ipynb
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/colab/nlp/customize_encoder.ipynb
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Customizing a Transformer Encoder",
+      "private_outputs": true,
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Bp8t2AI8i7uP"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "cellView": "form",
+        "id": "rxPj2Lsni9O4"
+      },
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6xS-9i5DrRvO"
+      },
+      "source": [
+        "# Customizing a Transformer Encoder"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Mwb9uw1cDXsa"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/official_models/nlp/customize_encoder\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/nlp/customize_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iLrcV4IyrcGX"
+      },
+      "source": [
+        "## Learning objectives\n",
+        "\n",
+        "The [TensorFlow Models NLP library](https://github.com/tensorflow/models/tree/master/official/nlp/modeling) is a collection of tools for building and training modern high performance natural language models.\n",
+        "\n",
+        "The [TransformEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py) is the core of this library, and lots of new network architectures are proposed to improve the encoder. In this Colab notebook, we will learn how to customize the encoder to employ new network architectures."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YYxdyoWgsl8t"
+      },
+      "source": [
+        "## Install and import"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fEJSFutUsn_h"
+      },
+      "source": [
+        "### Install the TensorFlow Model Garden pip package\n",
+        "\n",
+        "*  `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
+        "which is the nightly Model Garden package created daily automatically.\n",
+        "*  `pip` will install all models and dependencies automatically."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "thsKZDjhswhR"
+      },
+      "source": [
+        "!pip install -q tf-models-official==2.4.0"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hpf7JPCVsqtv"
+      },
+      "source": [
+        "### Import Tensorflow and other libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "my4dp-RMssQe"
+      },
+      "source": [
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from official.modeling import activations\n",
+        "from official.nlp import modeling\n",
+        "from official.nlp.modeling import layers, losses, models, networks"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vjDmVsFfs85n"
+      },
+      "source": [
+        "## Canonical BERT encoder\n",
+        "\n",
+        "Before learning how to customize the encoder, let's firstly create a canonical BERT enoder and use it to instantiate a `BertClassifier` for classification task."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Oav8sbgstWc-"
+      },
+      "source": [
+        "cfg = {\n",
+        "    \"vocab_size\": 100,\n",
+        "    \"hidden_size\": 32,\n",
+        "    \"num_layers\": 3,\n",
+        "    \"num_attention_heads\": 4,\n",
+        "    \"intermediate_size\": 64,\n",
+        "    \"activation\": activations.gelu,\n",
+        "    \"dropout_rate\": 0.1,\n",
+        "    \"attention_dropout_rate\": 0.1,\n",
+        "    \"max_sequence_length\": 16,\n",
+        "    \"type_vocab_size\": 2,\n",
+        "    \"initializer\": tf.keras.initializers.TruncatedNormal(stddev=0.02),\n",
+        "}\n",
+        "bert_encoder = modeling.networks.BertEncoder(**cfg)\n",
+        "\n",
+        "def build_classifier(bert_encoder):\n",
+        "  return modeling.models.BertClassifier(bert_encoder, num_classes=2)\n",
+        "\n",
+        "canonical_classifier_model = build_classifier(bert_encoder)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Qe2UWI6_tsHo"
+      },
+      "source": [
+        "`canonical_classifier_model` can be trained using the training data. For details about how to train the model, please see the colab [fine_tuning_bert.ipynb](https://github.com/tensorflow/models/blob/master/official/colab/fine_tuning_bert.ipynb). We skip the code that trains the model here.\n",
+        "\n",
+        "After training, we can apply the model to do prediction.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "csED2d-Yt5h6"
+      },
+      "source": [
+        "def predict(model):\n",
+        "  batch_size = 3\n",
+        "  np.random.seed(0)\n",
+        "  word_ids = np.random.randint(\n",
+        "      cfg[\"vocab_size\"], size=(batch_size, cfg[\"max_sequence_length\"]))\n",
+        "  mask = np.random.randint(2, size=(batch_size, cfg[\"max_sequence_length\"]))\n",
+        "  type_ids = np.random.randint(\n",
+        "      cfg[\"type_vocab_size\"], size=(batch_size, cfg[\"max_sequence_length\"]))\n",
+        "  print(model([word_ids, mask, type_ids], training=False))\n",
+        "\n",
+        "predict(canonical_classifier_model)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PzKStEK9t_Pb"
+      },
+      "source": [
+        "## Customize BERT encoder\n",
+        "\n",
+        "One BERT encoder consists of an embedding network and multiple transformer blocks, and each transformer block contains an attention layer and a feedforward layer."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rmwQfhj6fmKz"
+      },
+      "source": [
+        "We provide easy ways to customize each of those components via (1)\n",
+        "[EncoderScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py) and (2) [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xsMgEVHAui11"
+      },
+      "source": [
+        "### Use EncoderScaffold\n",
+        "\n",
+        "`EncoderScaffold` allows users to provide a custom embedding subnetwork\n",
+        "  (which will replace the standard embedding logic) and/or a custom hidden layer class (which will replace the `Transformer` instantiation in the encoder)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-JBabpa2AOz8"
+      },
+      "source": [
+        "#### Without Customization\n",
+        "\n",
+        "Without any customization, `EncoderScaffold` behaves the same the canonical `BertEncoder`.\n",
+        "\n",
+        "As shown in the following example, `EncoderScaffold` can load `BertEncoder`'s weights and output the same values:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ktNzKuVByZQf"
+      },
+      "source": [
+        "default_hidden_cfg = dict(\n",
+        "    num_attention_heads=cfg[\"num_attention_heads\"],\n",
+        "    intermediate_size=cfg[\"intermediate_size\"],\n",
+        "    intermediate_activation=activations.gelu,\n",
+        "    dropout_rate=cfg[\"dropout_rate\"],\n",
+        "    attention_dropout_rate=cfg[\"attention_dropout_rate\"],\n",
+        "    kernel_initializer=tf.keras.initializers.TruncatedNormal(0.02),\n",
+        ")\n",
+        "default_embedding_cfg = dict(\n",
+        "    vocab_size=cfg[\"vocab_size\"],\n",
+        "    type_vocab_size=cfg[\"type_vocab_size\"],\n",
+        "    hidden_size=cfg[\"hidden_size\"],\n",
+        "    initializer=tf.keras.initializers.TruncatedNormal(0.02),\n",
+        "    dropout_rate=cfg[\"dropout_rate\"],\n",
+        "    max_seq_length=cfg[\"max_sequence_length\"]\n",
+        ")\n",
+        "default_kwargs = dict(\n",
+        "    hidden_cfg=default_hidden_cfg,\n",
+        "    embedding_cfg=default_embedding_cfg,\n",
+        "    num_hidden_instances=cfg[\"num_layers\"],\n",
+        "    pooled_output_dim=cfg[\"hidden_size\"],\n",
+        "    return_all_layer_outputs=True,\n",
+        "    pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(0.02),\n",
+        ")\n",
+        "\n",
+        "encoder_scaffold = modeling.networks.EncoderScaffold(**default_kwargs)\n",
+        "classifier_model_from_encoder_scaffold = build_classifier(encoder_scaffold)\n",
+        "classifier_model_from_encoder_scaffold.set_weights(\n",
+        "    canonical_classifier_model.get_weights())\n",
+        "predict(classifier_model_from_encoder_scaffold)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sMaUmLyIuwcs"
+      },
+      "source": [
+        "#### Customize Embedding\n",
+        "\n",
+        "Next, we show how to use a customized embedding network.\n",
+        "\n",
+        "We firstly build an embedding network that will replace the default network. This one will have 2 inputs (`mask` and `word_ids`) instead of 3, and won't use positional embeddings."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LTinnaG6vcsw"
+      },
+      "source": [
+        "word_ids = tf.keras.layers.Input(\n",
+        "    shape=(cfg['max_sequence_length'],), dtype=tf.int32, name=\"input_word_ids\")\n",
+        "mask = tf.keras.layers.Input(\n",
+        "    shape=(cfg['max_sequence_length'],), dtype=tf.int32, name=\"input_mask\")\n",
+        "embedding_layer = modeling.layers.OnDeviceEmbedding(\n",
+        "    vocab_size=cfg['vocab_size'],\n",
+        "    embedding_width=cfg['hidden_size'],\n",
+        "    initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),\n",
+        "    name=\"word_embeddings\")\n",
+        "word_embeddings = embedding_layer(word_ids)\n",
+        "attention_mask = layers.SelfAttentionMask()([word_embeddings, mask])\n",
+        "new_embedding_network = tf.keras.Model([word_ids, mask],\n",
+        "                                       [word_embeddings, attention_mask])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HN7_yu-6O3qI"
+      },
+      "source": [
+        "Inspecting `new_embedding_network`, we can see it takes two inputs:\n",
+        "`input_word_ids` and `input_mask`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fO9zKFE4OpHp"
+      },
+      "source": [
+        "tf.keras.utils.plot_model(new_embedding_network, show_shapes=True, dpi=48)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9cOaGQHLv12W"
+      },
+      "source": [
+        "We then can build a new encoder using the above `new_embedding_network`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "mtFDMNf2vIl9"
+      },
+      "source": [
+        "kwargs = dict(default_kwargs)\n",
+        "\n",
+        "# Use new embedding network.\n",
+        "kwargs['embedding_cls'] = new_embedding_network\n",
+        "kwargs['embedding_data'] = embedding_layer.embeddings\n",
+        "\n",
+        "encoder_with_customized_embedding = modeling.networks.EncoderScaffold(**kwargs)\n",
+        "classifier_model = build_classifier(encoder_with_customized_embedding)\n",
+        "# ... Train the model ...\n",
+        "print(classifier_model.inputs)\n",
+        "\n",
+        "# Assert that there are only two inputs.\n",
+        "assert len(classifier_model.inputs) == 2"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Z73ZQDtmwg9K"
+      },
+      "source": [
+        "#### Customized Transformer\n",
+        "\n",
+        "User can also override the [hidden_cls](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/encoder_scaffold.py#L103) argument in `EncoderScaffold`'s constructor to employ a customized Transformer layer.\n",
+        "\n",
+        "See [ReZeroTransformer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/rezero_transformer.py) for how to implement a customized Transformer layer.\n",
+        "\n",
+        "Following is an example of using `ReZeroTransformer`:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uAIarLZgw6pA"
+      },
+      "source": [
+        "kwargs = dict(default_kwargs)\n",
+        "\n",
+        "# Use ReZeroTransformer.\n",
+        "kwargs['hidden_cls'] = modeling.layers.ReZeroTransformer\n",
+        "\n",
+        "encoder_with_rezero_transformer = modeling.networks.EncoderScaffold(**kwargs)\n",
+        "classifier_model = build_classifier(encoder_with_rezero_transformer)\n",
+        "# ... Train the model ...\n",
+        "predict(classifier_model)\n",
+        "\n",
+        "# Assert that the variable `rezero_alpha` from ReZeroTransformer exists.\n",
+        "assert 'rezero_alpha' in ''.join([x.name for x in classifier_model.trainable_weights])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6PMHFdvnxvR0"
+      },
+      "source": [
+        "### Use [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py)\n",
+        "\n",
+        "The above method of customizing `Transformer` requires rewriting the whole `Transformer` layer, while sometimes you may only want to customize either attention layer or feedforward block. In this case, [TransformerScaffold](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py) can be used.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D6FejlgwyAy_"
+      },
+      "source": [
+        "#### Customize Attention Layer\n",
+        "\n",
+        "User can also override the [attention_cls](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_scaffold.py#L45) argument in `TransformerScaffold`'s constructor to employ a customized Attention layer.\n",
+        "\n",
+        "See [TalkingHeadsAttention](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/talking_heads_attention.py) for how to implement a customized `Attention` layer.\n",
+        "\n",
+        "Following is an example of using [TalkingHeadsAttention](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/talking_heads_attention.py):"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nFrSMrZuyNeQ"
+      },
+      "source": [
+        "# Use TalkingHeadsAttention\n",
+        "hidden_cfg = dict(default_hidden_cfg)\n",
+        "hidden_cfg['attention_cls'] = modeling.layers.TalkingHeadsAttention\n",
+        "\n",
+        "kwargs = dict(default_kwargs)\n",
+        "kwargs['hidden_cls'] = modeling.layers.TransformerScaffold\n",
+        "kwargs['hidden_cfg'] = hidden_cfg\n",
+        "\n",
+        "encoder = modeling.networks.EncoderScaffold(**kwargs)\n",
+        "classifier_model = build_classifier(encoder)\n",
+        "# ... Train the model ...\n",
+        "predict(classifier_model)\n",
+        "\n",
+        "# Assert that the variable `pre_softmax_weight` from TalkingHeadsAttention exists.\n",
+        "assert 'pre_softmax_weight' in ''.join([x.name for x in classifier_model.trainable_weights])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kuEJcTyByVvI"
+      },
+      "source": [
+        "#### Customize Feedforward Layer\n",
+        "\n",
+        "Similiarly, one could also customize the feedforward layer.\n",
+        "\n",
+        "See [GatedFeedforward](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/gated_feedforward.py) for how to implement a customized feedforward layer.\n",
+        "\n",
+        "Following is an example of using [GatedFeedforward](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/gated_feedforward.py)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XAbKy_l4y_-i"
+      },
+      "source": [
+        "# Use TalkingHeadsAttention\n",
+        "hidden_cfg = dict(default_hidden_cfg)\n",
+        "hidden_cfg['feedforward_cls'] = modeling.layers.GatedFeedforward\n",
+        "\n",
+        "kwargs = dict(default_kwargs)\n",
+        "kwargs['hidden_cls'] = modeling.layers.TransformerScaffold\n",
+        "kwargs['hidden_cfg'] = hidden_cfg\n",
+        "\n",
+        "encoder_with_gated_feedforward = modeling.networks.EncoderScaffold(**kwargs)\n",
+        "classifier_model = build_classifier(encoder_with_gated_feedforward)\n",
+        "# ... Train the model ...\n",
+        "predict(classifier_model)\n",
+        "\n",
+        "# Assert that the variable `gate` from GatedFeedforward exists.\n",
+        "assert 'gate' in ''.join([x.name for x in classifier_model.trainable_weights])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "a_8NWUhkzeAq"
+      },
+      "source": [
+        "### Build a new Encoder using building blocks from KerasBERT.\n",
+        "\n",
+        "Finally, you could also build a new encoder using building blocks in the modeling library.\n",
+        "\n",
+        "See [AlbertEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/albert_encoder.py) as an example:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xsiA3RzUzmUM"
+      },
+      "source": [
+        "albert_encoder = modeling.networks.AlbertEncoder(**cfg)\n",
+        "classifier_model = build_classifier(albert_encoder)\n",
+        "# ... Train the model ...\n",
+        "predict(classifier_model)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MeidDfhlHKSO"
+      },
+      "source": [
+        "Inspecting the `albert_encoder`, we see it stacks the same `Transformer` layer multiple times."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Uv_juT22HERW"
+      },
+      "source": [
+        "tf.keras.utils.plot_model(albert_encoder, show_shapes=True, dpi=48)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/colab/nlp/nlp_modeling_library_intro.ipynb
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/colab/nlp/nlp_modeling_library_intro.ipynb
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "80xnUmoI7fBX"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "8nvTnfs6Q692"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WmfcMK5P5C1G"
+      },
+      "source": [
+        "# Introduction to the TensorFlow Models NLP library"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cH-oJ8R6AHMK"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/nlp/nlp_modeling_library_intro\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/nlp/nlp_modeling_library_intro.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0H_EFIhq4-MJ"
+      },
+      "source": [
+        "## Learning objectives\n",
+        "\n",
+        "In this Colab notebook, you will learn how to build transformer-based models for common NLP tasks including pretraining, span labelling and classification using the building blocks from [NLP modeling library](https://github.com/tensorflow/models/tree/master/official/nlp/modeling)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2N97-dps_nUk"
+      },
+      "source": [
+        "## Install and import"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "459ygAVl_rg0"
+      },
+      "source": [
+        "### Install the TensorFlow Model Garden pip package\n",
+        "\n",
+        "*  `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n",
+        "which is the nightly Model Garden package created daily automatically.\n",
+        "*  `pip` will install all models and dependencies automatically."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Y-qGkdh6_sZc"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q tf-models-official==2.4.0"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e4huSSwyAG_5"
+      },
+      "source": [
+        "### Import Tensorflow and other libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jqYXqtjBAJd9"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from official.nlp import modeling\n",
+        "from official.nlp.modeling import layers, losses, models, networks"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "djBQWjvy-60Y"
+      },
+      "source": [
+        "## BERT pretraining model\n",
+        "\n",
+        "BERT ([Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805)) introduced the method of pre-training language representations on a large text corpus and then using that model for downstream NLP tasks.\n",
+        "\n",
+        "In this section, we will learn how to build a model to pretrain BERT on the masked language modeling task and next sentence prediction task. For simplicity, we only show the minimum example and use dummy data."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MKuHVlsCHmiq"
+      },
+      "source": [
+        "### Build a `BertPretrainer` model wrapping `BertEncoder`\n",
+        "\n",
+        "The [BertEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/bert_encoder.py) implements the Transformer-based encoder as described in [BERT paper](https://arxiv.org/abs/1810.04805). It includes the embedding lookups and transformer layers, but not the masked language model or classification task networks.\n",
+        "\n",
+        "The [BertPretrainer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_pretrainer.py) allows a user to pass in a transformer stack, and instantiates the masked language model and classification networks that are used to create the training objectives."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EXkcXz-9BwB3"
+      },
+      "outputs": [],
+      "source": [
+        "# Build a small transformer network.\n",
+        "vocab_size = 100\n",
+        "sequence_length = 16\n",
+        "network = modeling.networks.BertEncoder(\n",
+        "    vocab_size=vocab_size, num_layers=2, sequence_length=16)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0NH5irV5KTMS"
+      },
+      "source": [
+        "Inspecting the encoder, we see it contains few embedding layers, stacked `Transformer` layers and are connected to three input layers:\n",
+        "\n",
+        "`input_word_ids`, `input_type_ids` and `input_mask`.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lZNoZkBrIoff"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(network, show_shapes=True, dpi=48)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "o7eFOZXiIl-b"
+      },
+      "outputs": [],
+      "source": [
+        "# Create a BERT pretrainer with the created network.\n",
+        "num_token_predictions = 8\n",
+        "bert_pretrainer = modeling.models.BertPretrainer(\n",
+        "    network, num_classes=2, num_token_predictions=num_token_predictions, output='predictions')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d5h5HT7gNHx_"
+      },
+      "source": [
+        "Inspecting the `bert_pretrainer`, we see it wraps the `encoder` with additional `MaskedLM` and `Classification` heads."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2tcNfm03IBF7"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(bert_pretrainer, show_shapes=True, dpi=48)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "F2oHrXGUIS0M"
+      },
+      "outputs": [],
+      "source": [
+        "# We can feed some dummy data to get masked language model and sentence output.\n",
+        "batch_size = 2\n",
+        "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n",
+        "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "masked_lm_positions_data = np.random.randint(2, size=(batch_size, num_token_predictions))\n",
+        "\n",
+        "outputs = bert_pretrainer(\n",
+        "    [word_id_data, mask_data, type_id_data, masked_lm_positions_data])\n",
+        "lm_output = outputs[\"masked_lm\"]\n",
+        "sentence_output = outputs[\"classification\"]\n",
+        "print(lm_output)\n",
+        "print(sentence_output)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bnx3UCHniCS5"
+      },
+      "source": [
+        "### Compute loss\n",
+        "Next, we can use `lm_output` and `sentence_output` to compute `loss`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "k30H4Q86f52x"
+      },
+      "outputs": [],
+      "source": [
+        "masked_lm_ids_data = np.random.randint(vocab_size, size=(batch_size, num_token_predictions))\n",
+        "masked_lm_weights_data = np.random.randint(2, size=(batch_size, num_token_predictions))\n",
+        "next_sentence_labels_data = np.random.randint(2, size=(batch_size))\n",
+        "\n",
+        "mlm_loss = modeling.losses.weighted_sparse_categorical_crossentropy_loss(\n",
+        "    labels=masked_lm_ids_data,\n",
+        "    predictions=lm_output,\n",
+        "    weights=masked_lm_weights_data)\n",
+        "sentence_loss = modeling.losses.weighted_sparse_categorical_crossentropy_loss(\n",
+        "    labels=next_sentence_labels_data,\n",
+        "    predictions=sentence_output)\n",
+        "loss = mlm_loss + sentence_loss\n",
+        "print(loss)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wrmSs8GjHxVw"
+      },
+      "source": [
+        "With the loss, you can optimize the model.\n",
+        "After training, we can save the weights of TransformerEncoder for the downstream fine-tuning tasks. Please see [run_pretraining.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_pretraining.py) for the full example.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k8cQVFvBCV4s"
+      },
+      "source": [
+        "## Span labeling model\n",
+        "\n",
+        "Span labeling is the task to assign labels to a span of the text, for example, label a span of text as the answer of a given question.\n",
+        "\n",
+        "In this section, we will learn how to build a span labeling model. Again, we use dummy data for simplicity."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xrLLEWpfknUW"
+      },
+      "source": [
+        "### Build a BertSpanLabeler wrapping BertEncoder\n",
+        "\n",
+        "[BertSpanLabeler](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_span_labeler.py) implements a simple single-span start-end predictor (that is, a model that predicts two values: a start token index and an end token index), suitable for SQuAD-style tasks.\n",
+        "\n",
+        "Note that `BertSpanLabeler` wraps a `BertEncoder`, the weights of which can be restored from the above pretraining model.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "B941M4iUCejO"
+      },
+      "outputs": [],
+      "source": [
+        "network = modeling.networks.BertEncoder(\n",
+        "        vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)\n",
+        "\n",
+        "# Create a BERT trainer with the created network.\n",
+        "bert_span_labeler = modeling.models.BertSpanLabeler(network)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QpB9pgj4PpMg"
+      },
+      "source": [
+        "Inspecting the `bert_span_labeler`, we see it wraps the encoder with additional `SpanLabeling` that outputs `start_position` and `end_postion`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RbqRNJCLJu4H"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(bert_span_labeler, show_shapes=True, dpi=48)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fUf1vRxZJwio"
+      },
+      "outputs": [],
+      "source": [
+        "# Create a set of 2-dimensional data tensors to feed into the model.\n",
+        "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n",
+        "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "\n",
+        "# Feed the data to the model.\n",
+        "start_logits, end_logits = bert_span_labeler([word_id_data, mask_data, type_id_data])\n",
+        "print(start_logits)\n",
+        "print(end_logits)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WqhgQaN1lt-G"
+      },
+      "source": [
+        "### Compute loss\n",
+        "With `start_logits` and `end_logits`, we can compute loss:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "waqs6azNl3Nn"
+      },
+      "outputs": [],
+      "source": [
+        "start_positions = np.random.randint(sequence_length, size=(batch_size))\n",
+        "end_positions = np.random.randint(sequence_length, size=(batch_size))\n",
+        "\n",
+        "start_loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "    start_positions, start_logits, from_logits=True)\n",
+        "end_loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "    end_positions, end_logits, from_logits=True)\n",
+        "\n",
+        "total_loss = (tf.reduce_mean(start_loss) + tf.reduce_mean(end_loss)) / 2\n",
+        "print(total_loss)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Zdf03YtZmd_d"
+      },
+      "source": [
+        "With the `loss`, you can optimize the model. Please see [run_squad.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_squad.py) for the full example."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0A1XnGSTChg9"
+      },
+      "source": [
+        "## Classification model\n",
+        "\n",
+        "In the last section, we show how to build a text classification model.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MSK8OpZgnQa9"
+      },
+      "source": [
+        "### Build a BertClassifier model wrapping BertEncoder\n",
+        "\n",
+        "[BertClassifier](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_classifier.py) implements a [CLS] token classification model containing a single classification head."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cXXCsffkCphk"
+      },
+      "outputs": [],
+      "source": [
+        "network = modeling.networks.BertEncoder(\n",
+        "        vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)\n",
+        "\n",
+        "# Create a BERT trainer with the created network.\n",
+        "num_classes = 2\n",
+        "bert_classifier = modeling.models.BertClassifier(\n",
+        "    network, num_classes=num_classes)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8tZKueKYP4bB"
+      },
+      "source": [
+        "Inspecting the `bert_classifier`, we see it wraps the `encoder` with additional `Classification` head."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "snlutm9ZJgEZ"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(bert_classifier, show_shapes=True, dpi=48)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yyHPHsqBJkCz"
+      },
+      "outputs": [],
+      "source": [
+        "# Create a set of 2-dimensional data tensors to feed into the model.\n",
+        "word_id_data = np.random.randint(vocab_size, size=(batch_size, sequence_length))\n",
+        "mask_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "type_id_data = np.random.randint(2, size=(batch_size, sequence_length))\n",
+        "\n",
+        "# Feed the data to the model.\n",
+        "logits = bert_classifier([word_id_data, mask_data, type_id_data])\n",
+        "print(logits)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w--a2mg4nzKm"
+      },
+      "source": [
+        "### Compute loss\n",
+        "\n",
+        "With `logits`, we can compute `loss`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9X0S1DoFn_5Q"
+      },
+      "outputs": [],
+      "source": [
+        "labels = np.random.randint(num_classes, size=(batch_size))\n",
+        "\n",
+        "loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "    labels, logits, from_logits=True)\n",
+        "print(loss)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mzBqOylZo3og"
+      },
+      "source": [
+        "With the `loss`, you can optimize the model. Please see [run_classifier.py](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py) or the colab [fine_tuning_bert.ipynb](https://github.com/tensorflow/models/blob/master/official/colab/fine_tuning_bert.ipynb) for the full example."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "Introduction to the TensorFlow Models NLP library",
+      "private_outputs": true,
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/common/__init__.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/common/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/common/dataset_fn.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/common/dataset_fn.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility library for picking an appropriate dataset function."""
+
+from typing import Any, Callable, Union, Type
+
+import tensorflow as tf
+
+PossibleDatasetType = Union[Type[tf.data.Dataset], Callable[[tf.Tensor], Any]]
+
+
+def pick_dataset_fn(file_type: str) -> PossibleDatasetType:
+  if file_type == 'tfrecord':
+    return tf.data.TFRecordDataset
+
+  raise ValueError('Unrecognized file_type: {}'.format(file_type))
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/common/distribute_utils.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/common/distribute_utils.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper functions for running models in a distributed setting."""
+
+import json
+import os
+import tensorflow as tf
+
+
+def _collective_communication(all_reduce_alg):
+  """Return a CollectiveCommunication based on all_reduce_alg.
+
+  Args:
+    all_reduce_alg: a string specifying which collective communication to pick,
+      or None.
+
+  Returns:
+    tf.distribute.experimental.CollectiveCommunication object
+
+  Raises:
+    ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"]
+  """
+  collective_communication_options = {
+      None: tf.distribute.experimental.CollectiveCommunication.AUTO,
+      "ring": tf.distribute.experimental.CollectiveCommunication.RING,
+      "nccl": tf.distribute.experimental.CollectiveCommunication.NCCL
+  }
+  if all_reduce_alg not in collective_communication_options:
+    raise ValueError(
+        "When used with `multi_worker_mirrored`, valid values for "
+        "all_reduce_alg are [`ring`, `nccl`].  Supplied value: {}".format(
+            all_reduce_alg))
+  return collective_communication_options[all_reduce_alg]
+
+
+def _mirrored_cross_device_ops(all_reduce_alg, num_packs):
+  """Return a CrossDeviceOps based on all_reduce_alg and num_packs.
+
+  Args:
+    all_reduce_alg: a string specifying which cross device op to pick, or None.
+    num_packs: an integer specifying number of packs for the cross device op.
+
+  Returns:
+    tf.distribute.CrossDeviceOps object or None.
+
+  Raises:
+    ValueError: if `all_reduce_alg` not in [None, "nccl", "hierarchical_copy"].
+  """
+  if all_reduce_alg is None:
+    return None
+  mirrored_all_reduce_options = {
+      "nccl": tf.distribute.NcclAllReduce,
+      "hierarchical_copy": tf.distribute.HierarchicalCopyAllReduce
+  }
+  if all_reduce_alg not in mirrored_all_reduce_options:
+    raise ValueError(
+        "When used with `mirrored`, valid values for all_reduce_alg are "
+        "[`nccl`, `hierarchical_copy`].  Supplied value: {}".format(
+            all_reduce_alg))
+  cross_device_ops_class = mirrored_all_reduce_options[all_reduce_alg]
+  return cross_device_ops_class(num_packs=num_packs)
+
+
+def tpu_initialize(tpu_address):
+  """Initializes TPU for TF 2.x training.
+
+  Args:
+    tpu_address: string, bns address of master TPU worker.
+
+  Returns:
+    A TPUClusterResolver.
+  """
+  cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
+      tpu=tpu_address)
+  if tpu_address not in ("", "local"):
+    tf.config.experimental_connect_to_cluster(cluster_resolver)
+  tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
+  return cluster_resolver
+
+
+def get_distribution_strategy(distribution_strategy="mirrored",
+                              num_gpus=0,
+                              all_reduce_alg=None,
+                              num_packs=1,
+                              tpu_address=None,
+                              **kwargs):
+  """Return a DistributionStrategy for running the model.
+
+  Args:
+    distribution_strategy: a string specifying which distribution strategy to
+      use. Accepted values are "off", "one_device", "mirrored",
+      "parameter_server", "multi_worker_mirrored", and "tpu" -- case
+      insensitive. "tpu" means to use TPUStrategy using `tpu_address`.
+      "off" means to use the default strategy which is obtained from
+      tf.distribute.get_strategy (for details on the default strategy, see
+      https://www.tensorflow.org/guide/distributed_training#default_strategy).
+    num_gpus: Number of GPUs to run this model.
+    all_reduce_alg: Optional. Specifies which algorithm to use when performing
+      all-reduce. For `MirroredStrategy`, valid values are "nccl" and
+      "hierarchical_copy". For `MultiWorkerMirroredStrategy`, valid values are
+      "ring" and "nccl".  If None, DistributionStrategy will choose based on
+      device topology.
+    num_packs: Optional.  Sets the `num_packs` in `tf.distribute.NcclAllReduce`
+      or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
+    tpu_address: Optional. String that represents TPU to connect to. Must not be
+      None if `distribution_strategy` is set to `tpu`.
+    **kwargs: Additional kwargs for internal usages.
+
+  Returns:
+    tf.distribute.DistibutionStrategy object.
+  Raises:
+    ValueError: if `distribution_strategy` is "off" or "one_device" and
+      `num_gpus` is larger than 1; or `num_gpus` is negative or if
+      `distribution_strategy` is `tpu` but `tpu_address` is not specified.
+  """
+  del kwargs
+  if num_gpus < 0:
+    raise ValueError("`num_gpus` can not be negative.")
+
+  if not isinstance(distribution_strategy, str):
+    msg = ("distribution_strategy must be a string but got: %s." %
+           (distribution_strategy,))
+    if distribution_strategy == False:  # pylint: disable=singleton-comparison,g-explicit-bool-comparison
+      msg += (" If you meant to pass the string 'off', make sure you add "
+              "quotes around 'off' so that yaml interprets it as a string "
+              "instead of a bool.")
+    raise ValueError(msg)
+
+  distribution_strategy = distribution_strategy.lower()
+  if distribution_strategy == "off":
+    if num_gpus > 1:
+      raise ValueError(f"When {num_gpus} GPUs are specified, "
+                       "distribution_strategy flag cannot be set to `off`.")
+    # Return the default distribution strategy.
+    return tf.distribute.get_strategy()
+
+  if distribution_strategy == "tpu":
+    # When tpu_address is an empty string, we communicate with local TPUs.
+    cluster_resolver = tpu_initialize(tpu_address)
+    return tf.distribute.TPUStrategy(cluster_resolver)
+
+  if distribution_strategy == "multi_worker_mirrored":
+    return tf.distribute.experimental.MultiWorkerMirroredStrategy(
+        communication=_collective_communication(all_reduce_alg))
+
+  if distribution_strategy == "one_device":
+    if num_gpus == 0:
+      return tf.distribute.OneDeviceStrategy("device:CPU:0")
+    if num_gpus > 1:
+      raise ValueError("`OneDeviceStrategy` can not be used for more than "
+                       "one device.")
+    return tf.distribute.OneDeviceStrategy("device:GPU:0")
+
+  if distribution_strategy == "mirrored":
+    if num_gpus == 0:
+      devices = ["device:CPU:0"]
+    else:
+      devices = ["device:GPU:%d" % i for i in range(num_gpus)]
+    return tf.distribute.MirroredStrategy(
+        devices=devices,
+        cross_device_ops=_mirrored_cross_device_ops(all_reduce_alg, num_packs))
+
+  if distribution_strategy == "parameter_server":
+    cluster_resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver()
+    return tf.distribute.experimental.ParameterServerStrategy(cluster_resolver)
+
+  raise ValueError("Unrecognized Distribution Strategy: %r" %
+                   distribution_strategy)
+
+
+def configure_cluster(worker_hosts=None, task_index=-1):
+  """Set multi-worker cluster spec in TF_CONFIG environment variable.
+
+  Args:
+    worker_hosts: comma-separated list of worker ip:port pairs.
+    task_index: index of the worker.
+
+  Returns:
+    Number of workers in the cluster.
+  """
+  tf_config = json.loads(os.environ.get("TF_CONFIG", "{}"))
+  if tf_config:
+    num_workers = (
+        len(tf_config["cluster"].get("chief", [])) +
+        len(tf_config["cluster"].get("worker", [])))
+  elif worker_hosts:
+    workers = worker_hosts.split(",")
+    num_workers = len(workers)
+    if num_workers > 1 and task_index < 0:
+      raise ValueError("Must specify task_index when number of workers > 1")
+    task_index = 0 if num_workers == 1 else task_index
+    os.environ["TF_CONFIG"] = json.dumps({
+        "cluster": {
+            "worker": workers
+        },
+        "task": {
+            "type": "worker",
+            "index": task_index
+        }
+    })
+  else:
+    num_workers = 1
+  return num_workers
+
+
+def get_strategy_scope(strategy):
+  if strategy:
+    strategy_scope = strategy.scope()
+  else:
+    strategy_scope = DummyContextManager()
+
+  return strategy_scope
+
+
+class DummyContextManager(object):
+
+  def __enter__(self):
+    pass
+
+  def __exit__(self, *args):
+    pass
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/common/distribute_utils_test.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/common/distribute_utils_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for distribution util functions."""
+
+import tensorflow as tf
+
+from official.common import distribute_utils
+
+
+class DistributeUtilsTest(tf.test.TestCase):
+  """Tests for distribute util functions."""
+
+  def test_invalid_args(self):
+    with self.assertRaisesRegex(ValueError, '`num_gpus` can not be negative.'):
+      _ = distribute_utils.get_distribution_strategy(num_gpus=-1)
+
+    with self.assertRaisesRegex(ValueError,
+                                '.*If you meant to pass the string .*'):
+      _ = distribute_utils.get_distribution_strategy(
+          distribution_strategy=False, num_gpus=0)
+    with self.assertRaisesRegex(ValueError, 'When 2 GPUs are specified.*'):
+      _ = distribute_utils.get_distribution_strategy(
+          distribution_strategy='off', num_gpus=2)
+    with self.assertRaisesRegex(ValueError,
+                                '`OneDeviceStrategy` can not be used.*'):
+      _ = distribute_utils.get_distribution_strategy(
+          distribution_strategy='one_device', num_gpus=2)
+
+  def test_one_device_strategy_cpu(self):
+    ds = distribute_utils.get_distribution_strategy('one_device', num_gpus=0)
+    self.assertEquals(ds.num_replicas_in_sync, 1)
+    self.assertEquals(len(ds.extended.worker_devices), 1)
+    self.assertIn('CPU', ds.extended.worker_devices[0])
+
+  def test_one_device_strategy_gpu(self):
+    ds = distribute_utils.get_distribution_strategy('one_device', num_gpus=1)
+    self.assertEquals(ds.num_replicas_in_sync, 1)
+    self.assertEquals(len(ds.extended.worker_devices), 1)
+    self.assertIn('GPU', ds.extended.worker_devices[0])
+
+  def test_mirrored_strategy(self):
+    ds = distribute_utils.get_distribution_strategy(num_gpus=5)
+    self.assertEquals(ds.num_replicas_in_sync, 5)
+    self.assertEquals(len(ds.extended.worker_devices), 5)
+    for device in ds.extended.worker_devices:
+      self.assertIn('GPU', device)
+
+    _ = distribute_utils.get_distribution_strategy(
+        distribution_strategy='mirrored',
+        num_gpus=2,
+        all_reduce_alg='nccl',
+        num_packs=2)
+    with self.assertRaisesRegex(
+        ValueError,
+        'When used with `mirrored`, valid values for all_reduce_alg are.*'):
+      _ = distribute_utils.get_distribution_strategy(
+          distribution_strategy='mirrored',
+          num_gpus=2,
+          all_reduce_alg='dummy',
+          num_packs=2)
+
+  def test_mwms(self):
+    distribute_utils.configure_cluster(worker_hosts=None, task_index=-1)
+    ds = distribute_utils.get_distribution_strategy(
+        'multi_worker_mirrored', all_reduce_alg='nccl')
+    self.assertIsInstance(
+        ds, tf.distribute.experimental.MultiWorkerMirroredStrategy)
+
+  def test_no_strategy(self):
+    ds = distribute_utils.get_distribution_strategy('off')
+    self.assertIs(ds, tf.distribute.get_strategy())
+
+  def test_invalid_strategy(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        'distribution_strategy must be a string but got: False. If'):
+      distribute_utils.get_distribution_strategy(False)
+    with self.assertRaisesRegexp(
+        ValueError, 'distribution_strategy must be a string but got: 1'):
+      distribute_utils.get_distribution_strategy(1)
+
+  def test_get_strategy_scope(self):
+    ds = distribute_utils.get_distribution_strategy('one_device', num_gpus=0)
+    with distribute_utils.get_strategy_scope(ds):
+      self.assertIs(tf.distribute.get_strategy(), ds)
+    with distribute_utils.get_strategy_scope(None):
+      self.assertIsNot(tf.distribute.get_strategy(), ds)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/common/flags.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/common/flags.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The central place to define flags."""
+
+from absl import flags
+
+
+def define_flags():
+  """Defines flags.
+
+  All flags are defined as optional, but in practice most models use some of
+  these flags and so mark_flags_as_required() should be called after calling
+  this function. Typically, 'experiment', 'mode', and 'model_dir' are required.
+  For example:
+
+  ```
+  from absl import flags
+  from official.common import flags as tfm_flags  # pylint: disable=line-too-long
+  ...
+  tfm_flags.define_flags()
+  flags.mark_flags_as_required(['experiment', 'mode', 'model_dir'])
+  ```
+
+  The reason all flags are optional is because unit tests often do not set or
+  use any of the flags.
+  """
+  flags.DEFINE_string(
+      'experiment', default=None, help=
+      'The experiment type registered, specifying an ExperimentConfig.')
+
+  flags.DEFINE_enum(
+      'mode',
+      default=None,
+      enum_values=[
+          'train', 'eval', 'train_and_eval', 'continuous_eval',
+          'continuous_train_and_eval', 'train_and_validate'
+      ],
+      help='Mode to run: `train`, `eval`, `train_and_eval`, '
+      '`continuous_eval`, `continuous_train_and_eval` and '
+      '`train_and_validate` (which is not implemented in '
+      'the open source version).')
+
+  flags.DEFINE_string(
+      'model_dir',
+      default=None,
+      help='The directory where the model and training/evaluation summaries'
+      'are stored.')
+
+  flags.DEFINE_multi_string(
+      'config_file',
+      default=None,
+      help='YAML/JSON files which specifies overrides. The override order '
+      'follows the order of args. Note that each file '
+      'can be used as an override template to override the default parameters '
+      'specified in Python. If the same parameter is specified in both '
+      '`--config_file` and `--params_override`, `config_file` will be used '
+      'first, followed by params_override.')
+
+  flags.DEFINE_string(
+      'params_override',
+      default=None,
+      help='a YAML/JSON string or a YAML file which specifies additional '
+      'overrides over the default parameters and those specified in '
+      '`--config_file`. Note that this is supposed to be used only to override '
+      'the model parameters, but not the parameters like TPU specific flags. '
+      'One canonical use case of `--config_file` and `--params_override` is '
+      'users first define a template config file using `--config_file`, then '
+      'use `--params_override` to adjust the minimal set of tuning parameters, '
+      'for example setting up different `train_batch_size`. The final override '
+      'order of parameters: default_model_params --> params from config_file '
+      '--> params in params_override. See also the help message of '
+      '`--config_file`.')
+
+  # The libraries rely on gin often make mistakes that include flags inside
+  # the library files which causes conflicts.
+  try:
+    flags.DEFINE_multi_string(
+        'gin_file', default=None, help='List of paths to the config files.')
+  except flags.DuplicateFlagError:
+    pass
+
+  try:
+    flags.DEFINE_multi_string(
+        'gin_params',
+        default=None,
+        help='Newline separated list of Gin parameter bindings.')
+  except flags.DuplicateFlagError:
+    pass
+
+  flags.DEFINE_string(
+      'tpu',
+      default=None,
+      help='The Cloud TPU to use for training. This should be either the name '
+      'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 '
+      'url.')
+
+  flags.DEFINE_string(
+      'tf_data_service', default=None, help='The tf.data service address')
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/common/registry_imports.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/common/registry_imports.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""All necessary imports for registration."""
+# pylint: disable=unused-import
+from official.nlp import tasks
+from official.nlp.configs import experiment_configs
+from official.utils.testing import mock_task
+from official.vision import beta
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/common/streamz_counters.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/common/streamz_counters.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Global streamz counters."""
+
+from tensorflow.python.eager import monitoring
+
+
+progressive_policy_creation_counter = monitoring.Counter(
+    "/tensorflow/training/fast_training/progressive_policy_creation",
+    "Counter for the number of ProgressivePolicy creations.")
+
+
+stack_vars_to_vars_call_counter = monitoring.Counter(
+    "/tensorflow/training/fast_training/tf_vars_to_vars",
+    "Counter for the number of low-level stacking API calls.")
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/core/__init__.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/core/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/core/actions.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/core/actions.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provides TFM orbit actions and associated helper functions/classes."""
+
+import os
+from typing import List
+from absl import logging
+
+import gin
+import orbit
+import tensorflow as tf
+import tensorflow_model_optimization as tfmot
+
+from official.core import base_trainer
+from official.core import config_definitions
+from official.modeling import optimization
+
+
+class PruningActions:
+  """Train action to updates pruning related information.
+
+  This action updates pruning steps at the end of trainig loop, and log
+    pruning metrics to tensorboard.
+
+  This action must be used when training a pruned model to avoid pruning error.
+  """
+
+  def __init__(
+      self,
+      export_dir: str,
+      model: tf.keras.Model,
+      optimizer: tf.keras.optimizers.Optimizer,
+  ):
+    """Initializes the instance.
+
+    Args:
+      export_dir: `str` for the export directory of the pruning summaries.
+      model: `tf.keras.Model` model instance used for training. This will be
+        used to assign a pruning step to each prunable weight.
+      optimizer: `tf.keras.optimizers.Optimizer` optimizer instance used for
+        training. This will be used to find the current training steps.
+    """
+    self._optimizer = optimizer
+    self.update_pruning_step = tfmot.sparsity.keras.UpdatePruningStep()
+    self.update_pruning_step.set_model(model)
+    self.update_pruning_step.on_train_begin()
+
+    self.pruning_summaries = tfmot.sparsity.keras.PruningSummaries(
+        log_dir=export_dir)
+    model.optimizer = optimizer
+    self.pruning_summaries.set_model(model)
+
+  def __call__(self, output: orbit.runner.Output):
+    """Update pruning step and log pruning summaries.
+
+    Args:
+      output: The train output to test.
+    """
+    self.update_pruning_step.on_epoch_end(batch=None)
+    self.pruning_summaries.on_epoch_begin(epoch=None)
+
+
+class EMACheckpointing:
+  """Eval action to save checkpoint with average weights when EMA is used.
+
+  This action swaps the weights of the model with the average weights, then it
+  saves the checkpoint under export_dir/ema_checkpoints. Checkpointing is
+  expensive for large models, so doing this action in eval is more efficient
+  than training.
+  """
+
+  def __init__(self, export_dir: str, optimizer: tf.keras.optimizers.Optimizer,
+               checkpoint: tf.train.Checkpoint, max_to_keep: int = 1):
+    """Initializes the instance.
+
+    Args:
+      export_dir: `str` for the export directory of the EMA average weights.
+      optimizer: `tf.keras.optimizers.Optimizer` optimizer instance used for
+        training. This will be used to swap the model weights with the average
+        weigths.
+      checkpoint: `tf.train.Checkpoint` instance.
+      max_to_keep: `int` for max checkpoints to keep in ema_checkpoints subdir.
+    """
+    if not isinstance(optimizer, optimization.ExponentialMovingAverage):
+      raise ValueError('Optimizer has to be instance of'
+                       'optimization.ExponentialMovingAverage for'
+                       'EMACheckpointing action')
+
+    export_dir = os.path.join(export_dir, 'ema_checkpoints')
+    tf.io.gfile.makedirs(
+        os.path.dirname(export_dir))
+    self._optimizer = optimizer
+    self._checkpoint = checkpoint
+    self._checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        directory=export_dir,
+        max_to_keep=max_to_keep,
+        checkpoint_name='average_weights')
+
+  def __call__(self, output: orbit.runner.Output):
+    """Swaps model weights, and saves the checkpoint.
+
+    Args:
+      output: The train or eval output to test.
+    """
+    self._optimizer.swap_weights()
+    self._checkpoint_manager.save(checkpoint_number=self._optimizer.iterations)
+    self._optimizer.swap_weights()
+
+
+class RecoveryAction:
+  """Train action to recover from loss blowup.
+
+  Checks the loss value by the given threshold. If applicable, recover the
+  model by reading the checkpoint on disk.
+  """
+
+  def __init__(self, checkpoint_manager: tf.train.CheckpointManager):
+    self.checkpoint_manager = checkpoint_manager
+
+  def __call__(self, _):
+    """Recovers the training by triggering checkpoint restoration."""
+    # Loads the previous good checkpoint.
+    checkpoint_path = self.checkpoint_manager.restore_or_initialize()
+    logging.warning('Recovering the model from checkpoint: %s.',
+                    checkpoint_path)
+
+
+class RecoveryCondition:
+  """Recovery Condition."""
+
+  def __init__(self,
+               global_step: tf.Variable,
+               loss_upper_bound: float,
+               recovery_begin_steps: int = 0,
+               recovery_max_trials: int = 3):
+    self.recover_counter = 0
+    self.recovery_begin_steps = recovery_begin_steps
+    self.recovery_max_trials = recovery_max_trials
+    self.loss_upper_bound = loss_upper_bound
+    self.global_step = global_step
+
+  def __call__(self, outputs: orbit.runner.Output):
+    loss_value = outputs['training_loss']
+    if tf.math.is_nan(loss_value):
+      self.recover_counter += 1
+      if self.recover_counter > self.recovery_max_trials:
+        raise RuntimeError(
+            'The loss value is NaN after training loop and it happens %d times.'
+            % self.recover_counter)
+      return True
+    if (self.global_step >= self.recovery_begin_steps and
+        loss_value > self.loss_upper_bound):
+      self.recover_counter += 1
+      if self.recover_counter > self.recovery_max_trials:
+        raise RuntimeError(
+            f'The loss value is {loss_value}, which is larger than the bound {self.loss_upper_bound}, happens {self.recover_counter} times.'
+        )
+      return True
+    return False
+
+
+@gin.configurable
+def get_eval_actions(
+    params: config_definitions.ExperimentConfig,
+    trainer: base_trainer.Trainer,
+    model_dir: str) -> List[orbit.Action]:
+  """Gets eval actions for TFM trainer."""
+  eval_actions = []
+  # Adds ema checkpointing action to save the average weights under
+  # ema_checkpoints subdir.
+  if isinstance(trainer.optimizer, optimization.ExponentialMovingAverage):
+    eval_actions.append(
+        EMACheckpointing(
+            export_dir=model_dir,
+            optimizer=trainer.optimizer,
+            checkpoint=trainer.checkpoint,
+            max_to_keep=params.trainer.max_to_keep))
+
+  return eval_actions
+
+
+@gin.configurable
+def get_train_actions(
+    params: config_definitions.ExperimentConfig, trainer: base_trainer.Trainer,
+    model_dir: str,
+    checkpoint_manager: tf.train.CheckpointManager) -> List[orbit.Action]:
+  """Gets train actions for TFM trainer."""
+  train_actions = []
+  # Adds pruning callback actions.
+  if hasattr(params.task, 'pruning'):
+    train_actions.append(
+        PruningActions(
+            export_dir=model_dir,
+            model=trainer.model,
+            optimizer=trainer.optimizer))
+
+  if params.trainer.recovery_max_trials >= 0:
+    recovery_condition = RecoveryCondition(
+        global_step=trainer.global_step,
+        loss_upper_bound=params.trainer.loss_upper_bound,
+        recovery_begin_steps=params.trainer.recovery_begin_steps,
+        recovery_max_trials=params.trainer.recovery_max_trials,
+    )
+    recover_action = orbit.actions.ConditionalAction(
+        condition=recovery_condition,
+        action=RecoveryAction(checkpoint_manager),
+    )
+    train_actions.append(recover_action)
+  return train_actions
--- a/TensorFlow2x/ComputeVision/Classification/models-master/official/core/actions_test.py
+++ b/TensorFlow2x/ComputeVision/Classification/models-master/official/core/actions_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for TFM actions."""
+
+import os
+
+from absl.testing import parameterized
+import numpy as np
+import orbit
+import tensorflow as tf
+
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from official.core import actions
+from official.modeling import optimization
+
+
+class TestModel(tf.Module):
+
+  def __init__(self):
+    self.value = tf.Variable(0)
+
+  @tf.function(input_signature=[])
+  def __call__(self):
+    return self.value
+
+
+class ActionsTest(tf.test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              strategy_combinations.cloud_tpu_strategy,
+              strategy_combinations.one_device_strategy_gpu,
+          ],))
+  def test_ema_checkpointing(self, distribution):
+    with distribution.scope():
+      directory = self.create_tempdir()
+      model = TestModel()
+      optimizer = tf.keras.optimizers.SGD()
+      optimizer = optimization.ExponentialMovingAverage(
+          optimizer, trainable_weights_only=False)
+
+      # Creats average weights for the model variables. Average weights are
+      # initialized to zero.
+      optimizer.shadow_copy(model)
+      checkpoint = tf.train.Checkpoint(model=model)
+
+      # Changes model.value to 3, average value is still 0.
+      model.value.assign(3)
+
+      # Checks model.value is 3
+      self.assertEqual(model(), 3)
+      ema_action = actions.EMACheckpointing(directory, optimizer, checkpoint)
+
+      ema_action({})
+      self.assertNotEmpty(
+          tf.io.gfile.glob(os.path.join(directory, 'ema_checkpoints')))
+
+      checkpoint.read(tf.train.latest_checkpoint(
+          os.path.join(directory, 'ema_checkpoints')))
+
+      # Checks model.value is 0 after swapping.
+      self.assertEqual(model(), 0)
+
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              strategy_combinations.default_strategy,
+              strategy_combinations.cloud_tpu_strategy,
+              strategy_combinations.one_device_strategy_gpu,
+          ],))
+  def test_recovery_condition(self, distribution):
+    with distribution.scope():
+      global_step = orbit.utils.create_global_step()
+      recover_condition = actions.RecoveryCondition(
+          global_step, loss_upper_bound=0.5, recovery_max_trials=2)
+      outputs = {'training_loss': 0.6}
+      self.assertTrue(recover_condition(outputs))
+      self.assertTrue(recover_condition(outputs))
+      with self.assertRaises(RuntimeError):
+        recover_condition(outputs)
+
+      global_step = orbit.utils.create_global_step()
+      recover_condition = actions.RecoveryCondition(
+          global_step, loss_upper_bound=0.5, recovery_max_trials=2)
+      outputs = {'training_loss': tf.constant([np.nan], tf.float32)}
+      self.assertTrue(recover_condition(outputs))
+      self.assertTrue(recover_condition(outputs))
+      with self.assertRaises(RuntimeError):
+        recover_condition(outputs)
+
+
+if __name__ == '__main__':
+  tf.test.main()