Merge pull request #7 from tensorflow/master

updated

Merge pull request #7 from tensorflow/master
updated
965cc3ee · Ayushman Kumar · GitHub · 1f3247f4 · 1f685c54 · 965cc3ee
Unverified Commit 965cc3ee authored Apr 21, 2020 by Ayushman Kumar Committed by GitHub Apr 21, 2020
20 changed files
--- a/official/vision/image_classification/mnist_test.py
+++ b/official/vision/image_classification/mnist_test.py
@@ -22,7 +22,6 @@ import functools
 from absl.testing import parameterized
 import tensorflow as tf
-import tensorflow_datasets as tfds
 from tensorflow.python.distribute import combinations
 from tensorflow.python.distribute import strategy_combinations
@@ -88,5 +87,4 @@ class KerasMnistTest(tf.test.TestCase, parameterized.TestCase):
 if __name__ == "__main__":
-  tf.compat.v1.enable_v2_behavior()
  tf.test.main()
--- a/official/vision/image_classification/optimizer_factory.py
+++ b/official/vision/image_classification/optimizer_factory.py
@@ -19,13 +19,233 @@ from __future__ import division
 from __future__ import print_function
 from absl import logging
-import tensorflow.compat.v2 as tf
+import tensorflow as tf
 import tensorflow_addons as tfa
-from typing import Any, Dict, Text
+from typing import Any, Dict, Text, List
 from official.vision.image_classification import learning_rate
 from official.vision.image_classification.configs import base_configs
+# pylint: disable=protected-access
+class MovingAverage(tf.keras.optimizers.Optimizer):
+  """Optimizer that computes a moving average of the variables.
+  Empirically it has been found that using the moving average of the trained
+  parameters of a deep network is better than using its trained parameters
+  directly. This optimizer allows you to compute this moving average and swap
+  the variables at save time so that any code outside of the training loop
+  will use by default the average values instead of the original ones.
+  Example of usage for training:
+  ```python
+  opt = tf.keras.optimizers.SGD(learning_rate)
+  opt = MovingAverage(opt)
+  opt.shadow_copy(model)
+  ```
+  At test time, swap the shadow variables to evaluate on the averaged weights:
+  ```python
+  opt.swap_weights()
+  # Test eval the model here
+  opt.swap_weights()
+  ```
+  """
+  def __init__(self,
+               optimizer: tf.keras.optimizers.Optimizer,
+               average_decay: float = 0.99,
+               start_step: int = 0,
+               dynamic_decay: bool = True,
+               name: Text = 'moving_average',
+               **kwargs):
+    """Construct a new MovingAverage optimizer.
+    Args:
+      optimizer: `tf.keras.optimizers.Optimizer` that will be
+        used to compute and apply gradients.
+      average_decay: float. Decay to use to maintain the moving averages
+        of trained variables.
+      start_step: int. What step to start the moving average.
+      dynamic_decay: bool. Whether to change the decay based on the number
+        of optimizer updates. Decay will start at 0.1 and gradually increase
+        up to `average_decay` after each optimizer update. This behavior is
+        similar to `tf.train.ExponentialMovingAverage` in TF 1.x.
+      name: Optional name for the operations created when applying
+        gradients. Defaults to "moving_average".
+      **kwargs: keyword arguments. Allowed to be {`clipnorm`,
+        `clipvalue`, `lr`, `decay`}.
+    """
+    super(MovingAverage, self).__init__(name, **kwargs)
+    self._optimizer = optimizer
+    self._average_decay = average_decay
+    self._start_step = tf.constant(start_step, tf.float32)
+    self._dynamic_decay = dynamic_decay
+  def shadow_copy(self, model: tf.keras.Model):
+    """Creates shadow variables for the given model weights."""
+    for var in model.weights:
+      self.add_slot(var, 'average', initializer='zeros')
+    self._average_weights = [
+        self.get_slot(var, 'average') for var in model.weights
+    ]
+    self._model_weights = model.weights
+  @property
+  def has_shadow_copy(self):
+    """Whether this optimizer has created shadow variables."""
+    return self._model_weights is not None
+  def _create_slots(self, var_list):
+    self._optimizer._create_slots(var_list=var_list)  # pylint: disable=protected-access
+  def apply_gradients(self, grads_and_vars, name: Text = None):
+    result = self._optimizer.apply_gradients(grads_and_vars, name)
+    self.update_average(self._optimizer.iterations)
+    return result
+  @tf.function
+  def update_average(self, step: tf.Tensor):
+    step = tf.cast(step, tf.float32)
+    if step < self._start_step:
+      decay = tf.constant(0., tf.float32)
+    elif self._dynamic_decay:
+      decay = step - self._start_step
+      decay = tf.minimum(self._average_decay, (1. + decay) / (10. + decay))
+    else:
+      decay = self._average_decay
+    def _apply_moving(v_moving, v_normal):
+      diff = v_moving - v_normal
+      v_moving.assign_sub(tf.cast(1. - decay, v_moving.dtype) * diff)
+      return v_moving
+    def _update(strategy, v_moving_and_v_normal):
+      for v_moving, v_normal in v_moving_and_v_normal:
+        strategy.extended.update(v_moving, _apply_moving, args=(v_normal,))
+    ctx = tf.distribute.get_replica_context()
+    return ctx.merge_call(_update, args=(zip(self._average_weights,
+                                             self._model_weights),))
+  def swap_weights(self):
+    """Swap the average and moving weights.
+    This is a convenience method to allow one to evaluate the averaged weights
+    at test time. Loads the weights stored in `self._average` into the model,
+    keeping a copy of the original model weights. Swapping twice will return
+    the original weights.
+    """
+    if tf.distribute.in_cross_replica_context():
+      strategy = tf.distribute.get_strategy()
+      strategy.run(self._swap_weights, args=())
+    else:
+      raise ValueError('Swapping weights must occur under a '
+                       'tf.distribute.Strategy')
+  @tf.function
+  def _swap_weights(self):
+    def fn_0(a, b):
+      a.assign_add(b)
+      return a
+    def fn_1(b, a):
+      b.assign(a - b)
+      return b
+    def fn_2(a, b):
+      a.assign_sub(b)
+      return a
+    def swap(strategy, a_and_b):
+      """Swap `a` and `b` and mirror to all devices."""
+      for a, b in a_and_b:
+        strategy.extended.update(a, fn_0, args=(b,))  # a = a + b
+        strategy.extended.update(b, fn_1, args=(a,))  # b = a - b
+        strategy.extended.update(a, fn_2, args=(b,))  # a = a - b
+    ctx = tf.distribute.get_replica_context()
+    return ctx.merge_call(
+        swap, args=(zip(self._average_weights, self._model_weights),))
+  def assign_average_vars(self, var_list: List[tf.Variable]):
+    """Assign variables in var_list with their respective averages.
+    Args:
+      var_list: List of model variables to be assigned to their average.
+    Returns:
+      assign_op: The op corresponding to the assignment operation of
+        variables to their average.
+    """
+    assign_op = tf.group([
+        var.assign(self.get_slot(var, 'average')) for var in var_list
+        if var.trainable
+    ])
+    return assign_op
+  def _create_hypers(self):
+    self._optimizer._create_hypers()  # pylint: disable=protected-access
+  def _prepare(self, var_list):
+    return self._optimizer._prepare(var_list=var_list)  # pylint: disable=protected-access
+  @property
+  def iterations(self):
+    return self._optimizer.iterations
+  @iterations.setter
+  def iterations(self, variable):
+    self._optimizer.iterations = variable
+  @property
+  def weights(self):
+    # return self._weights + self._optimizer.weights
+    return self._optimizer.weights
+  @property
+  def lr(self):
+    return self._optimizer._get_hyper('learning_rate')
+  @lr.setter
+  def lr(self, lr):
+    self._optimizer._set_hyper('learning_rate', lr)
+  @property
+  def learning_rate(self):
+    return self._optimizer._get_hyper('learning_rate')
+  @learning_rate.setter
+  def learning_rate(self, learning_rate):  # pylint: disable=redefined-outer-name
+    self._optimizer._set_hyper('learning_rate', learning_rate)
+  def _resource_apply_dense(self, grad, var):
+    return self._optimizer._resource_apply_dense(grad, var)
+  def _resource_apply_sparse(self, grad, var, indices):
+    return self._optimizer._resource_apply_sparse(grad, var, indices)
+  def _resource_apply_sparse_duplicate_indices(self, grad, var, indices):
+    return self._optimizer._resource_apply_sparse_duplicate_indices(
+        grad, var, indices)
+  def get_config(self):
+    config = {
+        'optimizer': tf.keras.optimizers.serialize(self._optimizer),
+        'average_decay': self._average_decay,
+        'start_step': self._start_step,
+        'dynamic_decay': self._dynamic_decay,
+    }
+    base_config = super(MovingAverage, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    optimizer = tf.keras.optimizers.deserialize(
+        config.pop('optimizer'),
+        custom_objects=custom_objects,
+    )
+    return cls(optimizer, **config)
 def build_optimizer(
    optimizer_name: Text,
@@ -95,16 +315,17 @@ def build_optimizer(
  else:
    raise ValueError('Unknown optimizer %s' % optimizer_name)
+  if params.get('lookahead', None):
+    logging.info('Using lookahead optimizer.')
+    optimizer = tfa.optimizers.Lookahead(optimizer)
+  # Moving average should be applied last, as it's applied at test time
  moving_average_decay = params.get('moving_average_decay', 0.)
  if moving_average_decay is not None and moving_average_decay > 0.:
    logging.info('Including moving average decay.')
-    optimizer = tfa.optimizers.MovingAverage(
+    optimizer = MovingAverage(
        optimizer,
-        average_decay=params['moving_average_decay'],
+        average_decay=moving_average_decay)
-        num_updates=None)
-  if params.get('lookahead', None):
-    logging.info('Using lookahead optimizer.')
-    optimizer = tfa.optimizers.Lookahead(optimizer)
  return optimizer
@@ -139,7 +360,8 @@ def build_learning_rate(params: base_configs.LearningRateConfig,
    lr = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=base_lr,
        decay_steps=decay_steps,
-        decay_rate=decay_rate)
+        decay_rate=decay_rate,
+        staircase=params.staircase)
  elif decay_type == 'piecewise_constant_with_warmup':
    logging.info('Using Piecewise constant decay with warmup. '
                 'Parameters: batch_size: %d, epoch_size: %d, '

--- a/official/vision/image_classification/optimizer_factory_test.py
+++ b/official/vision/image_classification/optimizer_factory_test.py
@@ -19,7 +19,7 @@ from __future__ import division
 # from __future__ import google_type_annotations
 from __future__ import print_function
-import tensorflow.compat.v2 as tf
+import tensorflow as tf
 from absl.testing import parameterized
 from official.vision.image_classification import optimizer_factory
@@ -35,9 +35,9 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
      ('adam', 'adam', 0., False),
      ('adamw', 'adamw', 0., False),
      ('momentum_lookahead', 'momentum', 0., True),
-      ('sgd_ema', 'sgd', 0.001, False),
+      ('sgd_ema', 'sgd', 0.999, False),
-      ('momentum_ema', 'momentum', 0.001, False),
+      ('momentum_ema', 'momentum', 0.999, False),
-      ('rmsprop_ema', 'rmsprop', 0.001, False))
+      ('rmsprop_ema', 'rmsprop', 0.999, False))
  def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
    """Smoke test to be sure no syntax errors."""
    params = {
@@ -111,5 +111,4 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
 if __name__ == '__main__':
-  assert tf.version.VERSION.startswith('2.')
  tf.test.main()
--- a/official/vision/image_classification/preprocessing.py
+++ b/official/vision/image_classification/preprocessing.py
@@ -19,7 +19,7 @@ from __future__ import division
 # from __future__ import google_type_annotations
 from __future__ import print_function
-import tensorflow.compat.v2 as tf
+import tensorflow as tf
 from typing import List, Optional, Text, Tuple
 from official.vision.image_classification import augment

--- a/official/vision/image_classification/resnet/README.md
+++ b/official/vision/image_classification/resnet/README.md
-This folder contains a compile/fit and
+This folder contains a
 [custom training loop (CTL)](#resnet-custom-training-loop) implementation for
 ResNet50.
@@ -21,15 +21,11 @@ version uses a ResNet50 model implemented in
 * ResNet50 TFHub: [feature vector](https://tfhub.dev/tensorflow/resnet_50/feature_vector/1)
 and [classification](https://tfhub.dev/tensorflow/resnet_50/classification/1)
-```bash
-python3 resnet_imagenet_main.py
-```
 Again, if you did not download the data to the default directory, specify the
 location with the `--data_dir` flag:
 ```bash
-python3 resnet_imagenet_main.py --data_dir=/path/to/imagenet
+python3 resnet_ctl_imagenet_main.py --data_dir=/path/to/imagenet
 ```
 There are more flag options you can specify. Here are some examples:
@@ -48,7 +44,7 @@ For example, this is a typical command line to run with ImageNet data with
 batch size 128 per GPU:
 ```bash
-python3 -m resnet_imagenet_main.py \
+python3 -m resnet_ctl_imagenet_main.py \
    --model_dir=/tmp/model_dir/something \
    --num_gpus=2 \
    --batch_size=128 \

--- a/official/vision/image_classification/resnet/common.py
+++ b/official/vision/image_classification/resnet/common.py
@@ -166,7 +166,6 @@ def build_stats(history, eval_output, callbacks):
  if eval_output:
    stats['accuracy_top_1'] = float(eval_output[1])
    stats['eval_loss'] = float(eval_output[0])
  if history and history.history:
    train_hist = history.history
    # Gets final loss from training.
@@ -176,6 +175,8 @@ def build_stats(history, eval_output, callbacks):
      stats[TRAIN_TOP_1] = float(train_hist['categorical_accuracy'][-1])
    elif 'sparse_categorical_accuracy' in train_hist:
      stats[TRAIN_TOP_1] = float(train_hist['sparse_categorical_accuracy'][-1])
+    elif 'accuracy' in train_hist:
+      stats[TRAIN_TOP_1] = float(train_hist['accuracy'][-1])
  if not callbacks:
    return stats

--- a/official/vision/image_classification/resnet/resnet_config.py
+++ b/official/vision/image_classification/resnet/resnet_config.py
@@ -22,6 +22,7 @@ from typing import Any, Mapping
 import dataclasses
+from official.modeling.hyperparams import base_config
 from official.vision.image_classification.configs import base_configs
@@ -38,12 +39,13 @@ class ResNetModelConfig(base_configs.ModelConfig):
  """Configuration for the ResNet model."""
  name: str = 'ResNet'
  num_classes: int = 1000
-  model_params: Mapping[str, Any] = dataclasses.field(default_factory=lambda: {
+  model_params: base_config.Config = dataclasses.field(
-      'num_classes': 1000,
+      default_factory=lambda: {
-      'batch_size': None,
+          'num_classes': 1000,
-      'use_l2_regularizer': True,
+          'batch_size': None,
-      'rescale_inputs': False,
+          'use_l2_regularizer': True,
-  })
+          'rescale_inputs': False,
+      })
  loss: base_configs.LossConfig = base_configs.LossConfig(
      name='sparse_categorical_crossentropy')
  optimizer: base_configs.OptimizerConfig = base_configs.OptimizerConfig(

--- a/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py
+++ b/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import math
 from absl import app
 from absl import flags
 from absl import logging
@@ -81,8 +82,8 @@ def get_num_train_iterations(flags_obj):
    train_steps = min(flags_obj.train_steps, train_steps)
    train_epochs = 1
-  eval_steps = (
+  eval_steps = math.ceil(1.0 * imagenet_preprocessing.NUM_IMAGES['validation'] /
-      imagenet_preprocessing.NUM_IMAGES['validation'] // flags_obj.batch_size)
+                         flags_obj.batch_size)
  return train_steps, train_epochs, eval_steps

--- a/official/vision/image_classification/resnet/resnet_runnable.py
+++ b/official/vision/image_classification/resnet/resnet_runnable.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import tensorflow.compat.v2 as tf
+import tensorflow as tf
 from official.modeling import performance
 from official.staging.training import grad_utils

--- a/official/vision/image_classification/resnet/tfhub_export.py
+++ b/official/vision/image_classification/resnet/tfhub_export.py
@@ -24,7 +24,7 @@ import os
 from absl import app
 from absl import flags
-import tensorflow.compat.v2 as tf
+import tensorflow as tf
 from official.vision.image_classification.resnet import imagenet_preprocessing
 from official.vision.image_classification.resnet import resnet_model
@@ -59,7 +59,6 @@ def main(argv):
  if len(argv) > 1:
    raise app.UsageError("Too many command-line arguments.")
-  assert tf.version.VERSION.startswith('2.')
  export_tfhub(FLAGS.model_path, FLAGS.export_path)

--- a/research/README.md
+++ b/research/README.md
+![Logo](https://storage.googleapis.com/model_garden_artifacts/TF_Model_Garden.png)
 # TensorFlow Research Models
-This folder contains machine learning models implemented by researchers in
+This folder contains machine learning models implemented by researchers in [TensorFlow](https://tensorflow.org). 
-[TensorFlow](https://tensorflow.org). The models are maintained by their
-respective authors. To propose a model for inclusion, please submit a pull
+The research models are maintained by their respective authors. 
-request.
+**Note: Some research models are stale and have not updated to the latest TensorFlow 2 yet.**
+---
+## Frameworks / APIs with Models
+| Folder | Framework | Description | Maintainer(s) |
+|--------|-----------|-------------|---------------|
+| [object_detection](object_detection) | TensorFlow Object Detection API | A framework that makes it easy to construct, train and deploy object detection models<br/> | jch1, tombstone, derekjchow, jesu9, dreamdragon, pkulzc |
+| [slim](slim) | TensorFlow-Slim Image Classification Model Library | A lightweight high-level API of TensorFlow for defining, training and evaluating image classification models <br/>• Inception V1/V2/V3/V4<br/>• Inception-ResNet-v2<br/>• ResNet V1/V2<br/>• VGG 16/19<br/>• MobileNet V1/V2/V3<br/>• NASNet-A_Mobile/Large<br/>• PNASNet-5_Large/Mobile | sguada, nathansilberman |
+---
+## Models / Implementations
+| Folder | Paper(s) | Description | Maintainer(s) |
+|--------|----------|-------------|---------------|
+| [adv_imagenet<br />_models](adv_imagenet_models)   | [1] [Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)<br/>[2] [Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204) | Adversarially trained ImageNet models  | alexeykurakin  |
+| [adversarial_crypto](adversarial_crypto) | [Learning to Protect Communications with Adversarial Neural Cryptography](https://arxiv.org/abs/1610.06918) | Code to train encoder/decoder/adversary network triplets and evaluate their effectiveness on randomly generated input and key pairs | dave-andersen |
+| [adversarial<br />_logit_pairing](adversarial_logit_pairing)   | [Adversarial Logit Pairing](https://arxiv.org/abs/1803.06373) | Implementation of Adversarial logit pairing paper as well as few models pre-trained on ImageNet and Tiny ImageNet   | alexeykurakin |
+| [adversarial_text](adversarial_text) | [1] [Adversarial Training Methods for Semi-Supervised Text](https://arxiv.org/abs/1605.07725) Classification<br/>[2] [Semi-supervised Sequence Learning](https://arxiv.org/abs/1511.01432) | Adversarial Training Methods for Semi-Supervised Text Classification| rsepassi, a-dai |
+| [attention_ocr](attention_ocr)   | [Attention-based Extraction of Structured Information from Street View Imagery](https://arxiv.org/abs/1704.03549) | | alexgorban |
+| [audioset](audioset) | Models for AudioSet: A Large Scale Dataset of Audio Events | | plakal, dpwe |
+| [autoaugment](autoaugment) | [1] [AutoAugment](https://arxiv.org/abs/1805.09501)<br/>[2] [Wide Residual Networks](https://arxiv.org/abs/1605.07146)<br/>[3] [Shake-Shake regularization](https://arxiv.org/abs/1705.07485)<br/>[4] [ShakeDrop Regularization for Deep Residual Learning](https://arxiv.org/abs/1802.02375) | Train Wide-ResNet, Shake-Shake and ShakeDrop models on CIFAR-10 and CIFAR-100 dataset with AutoAugment | barretzoph |
+| [autoencoder](autoencoder) | Various autoencoders | | snurkabill |
+| [brain_coder](brain_coder) | [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526) | Program synthesis with reinforcement learning  | danabo |
+| [cognitive_mapping<br />_and_planning](cognitive_mapping_and_planning) | [Cognitive Mapping and Planning for Visual Navigation](https://arxiv.org/abs/1702.03920) | Implementation of a spatial memory based mapping and planning architecture for visual navigation | s-gupta |
+| [compression](compression) | [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148) | | nmjohn |
+| [cvt_text](cvt_text) | [Semi-supervised sequence learning with cross-view training](https://arxiv.org/abs/1809.08370) | | clarkkev, lmthang |
+| [deep_contextual<br />_bandits](deep_contextual_bandits) | [Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling](https://arxiv.org/abs/1802.09127) | | rikel |
+| [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | End-to-End Speech Recognition in English and Mandarin | |
+| [deeplab](deeplab)  | [1] [DeepLabv1](https://arxiv.org/abs/1412.7062)<br/>[2] [DeepLabv2](https://arxiv.org/abs/1606.00915)<br/>[3] [DeepLabv3](https://arxiv.org/abs/1802.02611)<br/>[4] [DeepLabv3+](https://arxiv.org/abs/1706.05587) | DeepLab models for semantic image segmentation | aquariusjay, yknzhu, gpapan |
+| [delf](delf)  | [1] [Large-Scale Image Retrieval with Attentive Deep Local Features](https://arxiv.org/abs/1612.06321) <br/>[2] [Detect-to-Retrieve](https://arxiv.org/abs/1812.01584) | DELF: DEep Local Features | andrefaraujo |
+| [domain_adaptation](domain_adaptation) | [1] [Domain Separation Networks](https://arxiv.org/abs/1608.06019) <br/>[2] [Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks](https://arxiv.org/abs/1612.05424) | Code used for two domain adaptation papers| bousmalis, dmrd |
+| [efficient-hrl](efficient-hrl) | [1] [Data-Efficient Hierarchical Reinforcement Learning](https://arxiv.org/abs/1805.08296)<br/>[2] [Near-Optimal Representation Learning for Hierarchical Reinforcement Learning](https://arxiv.org/abs/1810.01257) | Code for performing hierarchical reinforcement learning | ofirnachum |
+| [feelvos](feelvos)| [FEELVOS](https://arxiv.org/abs/1902.09513) | Fast End-to-End Embedding Learning for Video Object Segmentation | |
+| [fivo](fivo)| [Filtering variational objectives for training generative sequence models](https://arxiv.org/abs/1705.09279) | | dieterichlawson |
+| [global_objectives](global_objectives) | [Scalable Learning of Non-Decomposable Objectives](https://arxiv.org/abs/1608.04802) | TensorFlow loss functions that optimize directly for a variety of objectives including AUC, recall at precision, and more | mackeya-google |
+| [im2txt](im2txt) | [Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning Challenge](https://arxiv.org/abs/1609.06647) | Image-to-text neural network for image captioning| cshallue |
+| [inception](inception) | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | Deep convolutional networks for computer vision | shlens, vincentvanhoucke |
+| [keypointnet](keypointnet) | [KeypointNet](https://arxiv.org/abs/1807.03146) | Discovery of Latent 3D Keypoints via End-to-end Geometric Reasoning | mnorouzi |
+| [learned_optimizer](learned_optimizer) | [Learned Optimizers that Scale and Generalize](https://arxiv.org/abs/1703.04813) | | olganw, nirum |
+| [learning_to<br />_remember<br />_rare_events](learning_to_remember_rare_events) | [Learning to Remember Rare Events](https://arxiv.org/abs/1703.03129) | A large-scale life-long memory module for use in deep learning | lukaszkaiser, ofirnachum |
+| [learning<br />_unsupervised<br />_learning](learning_unsupervised_learning) | [Meta-Learning Update Rules for Unsupervised Representation Learning](https://arxiv.org/abs/1804.00222) | A meta-learned unsupervised learning update rule| lukemetz, nirum |
+| [lexnet_nc](lexnet_nc) | LexNET | Noun Compound Relation Classification | vered1986, waterson |
+| [lfads](lfads) | [LFADS - Latent Factor Analysis via Dynamical Systems](https://doi.org/10.1101/152884) | Sequential variational autoencoder for analyzing neuroscience data| jazcollins, sussillo |
+| [lm_1b](lm_1b) | [Exploring the Limits of Language Modeling](https://arxiv.org/abs/1602.02410) | Language modeling on the one billion word benchmark | oriolvinyals, panyx0718 |
+| [lm_commonsense](lm_commonsense) | [A Simple Method for Commonsense Reasoning](https://arxiv.org/abs/1806.02847) | Commonsense reasoning using language models | thtrieu |
+| [lstm_object_detection](lstm_object_detection) | [Mobile Video Object Detection with Temporally-Aware Feature Maps](https://arxiv.org/abs/1711.06368) | | dreamdragon, masonliuw, yinxiaoli, yongzhe2160 |
+| [marco](marco) | [Classification of crystallization outcomes using deep convolutional neural networks](https://arxiv.org/abs/1803.10342) | | vincentvanhoucke |
+| [maskgan](maskgan)| [MaskGAN: Better Text Generation via Filling in the______](https://arxiv.org/abs/1801.07736) | Text generation with GANs | a-dai |
+| [namignizer](namignizer)| Namignizer | Recognize and generate names | knathanieltucker |
+| [neural_gpu](neural_gpu)| [Neural GPUs Learn Algorithms](https://arxiv.org/abs/1511.08228) | Highly parallel neural computer | lukaszkaiser |
+| [neural_programmer](neural_programmer) | [Learning a Natural Language Interface with Neural Programmer](https://arxiv.org/abs/1611.08945) | Neural network augmented with logic and mathematic operations| arvind2505 |
+| [next_frame<br />_prediction](next_frame_prediction) | [Visual Dynamics](https://arxiv.org/abs/1607.02586) | Probabilistic Future Frame Synthesis via Cross Convolutional Networks| panyx0718 |
+| [pcl_rl](pcl_rl) | [1] [Improving Policy Gradient by Exploring Under-appreciated Rewards](https://arxiv.org/abs/1611.09321)<br/>[2] [Bridging the Gap Between Value and Policy Based Reinforcement Learning](https://arxiv.org/abs/1702.08892)<br/>[3] [Trust-PCL: An Off-Policy Trust Region Method for Continuous Control](https://arxiv.org/abs/1707.01891) | Code for several reinforcement learning algorithms | ofirnachum |
+| [ptn](ptn) | [Perspective Transformer Nets](https://arxiv.org/abs/1612.00814) | Learning Single-View 3D Object Reconstruction without 3D Supervision | xcyan, arkanath, hellojas, honglaklee |
+| [qa_kg](qa_kg) | [Learning to Reason](https://arxiv.org/abs/1704.05526) | End-to-End Module Networks for Visual Question Answering | yuyuz |
+| [real_nvp](real_nvp) | [Density estimation using Real NVP](https://arxiv.org/abs/1605.08803) | | laurent-dinh |
+| [rebar](rebar) | [REBAR](https://arxiv.org/abs/1703.07370) | Low-variance, unbiased gradient estimates for discrete latent variable models | gjtucker |
+| [sentiment<br />_analysis](sentiment_analysis)| [Effective Use of Word Order for Text Categorization with Convolutional Neural Networks](https://arxiv.org/abs/1412.1058) | | sculd |
+| [seq2species](seq2species) | [Seq2Species: A deep learning approach to pattern recognition for short DNA sequences](https://doi.org/10.1101/353474) | Neural Network Models for Species Classification| apbusia, depristo |
+| [skip_thoughts](skip_thoughts) | [Skip-Thought Vectors](https://arxiv.org/abs/1506.06726) | Recurrent neural network sentence-to-vector encoder | cshallue|
+| [steve](steve) | [Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion](https://arxiv.org/abs/1807.01675) | A hybrid model-based/model-free reinforcement learning algorithm for sample-efficient continuous control | buckman-google |
+| [street](street) | [End-to-End Interpretation of the French Street Name Signs Dataset](https://arxiv.org/abs/1702.03970) | Identify the name of a street (in France) from an image using a Deep RNN| theraysmith |
+| [struct2depth](struct2depth)| [Depth Prediction Without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos](https://arxiv.org/abs/1811.06152) | Unsupervised learning of depth and ego-motion| aneliaangelova |
+| [swivel](swivel) | [Swivel: Improving Embeddings by Noticing What's Missing](https://arxiv.org/abs/1602.02215) | The Swivel algorithm for generating word embeddings | waterson |
+| [tcn](tcn) | [Time-Contrastive Networks: Self-Supervised Learning from Video](https://arxiv.org/abs/1704.06888) | Self-supervised representation learning from multi-view video | coreylynch, sermanet |
+| [textsum](textsum)| Sequence-to-sequence with attention model for text summarization | | panyx0718, peterjliu |
+| [transformer](transformer) | [Spatial Transformer Network](https://arxiv.org/abs/1506.02025) | Spatial transformer network that allows the spatial manipulation of data within the network| daviddao|
+| [vid2depth](vid2depth) | [Unsupervised Learning of Depth and Ego-Motion from Monocular Video Using 3D Geometric Constraints](https://arxiv.org/abs/1802.05522) | Learning depth and ego-motion unsupervised from raw monocular video | rezama |
+| [video<br />_prediction](video_prediction) | [Unsupervised Learning for Physical Interaction through Video Prediction](https://arxiv.org/abs/1605.07157) | Predicting future video frames with neural advection| cbfinn |
-**Note: some research models are stale and have not updated to the latest
+---
-TensorFlow yet. If users have trouble with TF 2.x for research models,
-please consider TF 1.15.**
-## Models
+## Contributions
-   [adversarial_crypto](adversarial_crypto): protecting communications with
+If you want to contribute a new model, please submit a pull request.
-    adversarial neural cryptography.
-   [adversarial_text](adversarial_text): semi-supervised sequence learning with
-    adversarial training.
-   [attention_ocr](attention_ocr): a model for real-world image text
-    extraction.
-   [audioset](audioset): Models and supporting code for use with
-    [AudioSet](http://g.co/audioset).
-   [autoencoder](autoencoder): various autoencoders.
-   [brain_coder](brain_coder): Program synthesis with reinforcement learning.
-   [cognitive_mapping_and_planning](cognitive_mapping_and_planning):
-    implementation of a spatial memory based mapping and planning architecture
-    for visual navigation.
-   [compression](compression): compressing and decompressing images using a
-    pre-trained Residual GRU network.
-   [cvt_text](cvt_text): semi-supervised sequence learning with cross-view
-    training.
-   [deep_contextual_bandits](deep_contextual_bandits): code for a variety of contextual bandits algorithms using deep neural networks and Thompson sampling.
-   [deep_speech](deep_speech): automatic speech recognition.
-   [deeplab](deeplab): deep labeling for semantic image segmentation.
-   [delf](delf): deep local features for image matching and retrieval.
-   [domain_adaptation](domain_adaptation): domain separation networks.
-   [fivo](fivo): filtering variational objectives for training generative
-    sequence models.
-   [im2txt](im2txt): image-to-text neural network for image captioning.
-   [inception](inception): deep convolutional networks for computer vision.
-   [keypointnet](keypointnet): discovery of latent 3D keypoints via end-to-end
-    geometric eeasoning [[demo](https://keypointnet.github.io/)].
-   [learning_to_remember_rare_events](learning_to_remember_rare_events): a
-    large-scale life-long memory module for use in deep learning.
-   [learning_unsupervised_learning](learning_unsupervised_learning): a
-    meta-learned unsupervised learning update rule.
-   [lexnet_nc](lexnet_nc): a distributed model for noun compound relationship
-    classification.
-   [lfads](lfads): sequential variational autoencoder for analyzing
-    neuroscience data.
-   [lm_1b](lm_1b): language modeling on the one billion word benchmark.
-   [lm_commonsense](lm_commonsense): commonsense reasoning using language models.
-   [maskgan](maskgan): text generation with GANs.
-   [namignizer](namignizer): recognize and generate names.
-   [neural_gpu](neural_gpu): highly parallel neural computer.
-   [neural_programmer](neural_programmer): neural network augmented with logic
-    and mathematic operations.
-   [next_frame_prediction](next_frame_prediction): probabilistic future frame
-    synthesis via cross convolutional networks.
-   [object_detection](object_detection): localizing and identifying multiple
-    objects in a single image.
-   [pcl_rl](pcl_rl): code for several reinforcement learning algorithms,
-    including Path Consistency Learning.
-   [ptn](ptn): perspective transformer nets for 3D object reconstruction.
-   [marco](marco): automating the evaluation of crystallization experiments.
-   [qa_kg](qa_kg): module networks for question answering on knowledge graphs.
-   [real_nvp](real_nvp): density estimation using real-valued non-volume
-    preserving (real NVP) transformations.
-   [rebar](rebar): low-variance, unbiased gradient estimates for discrete
-    latent variable models.
-   [seq2species](seq2species): deep learning solution for read-level taxonomic
-    classification.
-   [skip_thoughts](skip_thoughts): recurrent neural network sentence-to-vector
-    encoder.
-   [slim](slim): image classification models in TF-Slim.
-   [street](street): identify the name of a street (in France) from an image
-    using a Deep RNN.
-   [struct2depth](struct2depth): unsupervised learning of depth and ego-motion.
-   [swivel](swivel): the Swivel algorithm for generating word embeddings.
-   [tcn](tcn): Self-supervised representation learning from multi-view video.
-   [textsum](textsum): sequence-to-sequence with attention model for text
-    summarization.
-   [transformer](transformer): spatial transformer network, which allows the
-    spatial manipulation of data within the network.
-   [vid2depth](vid2depth): learning depth and ego-motion unsupervised from
-    raw monocular video.
-   [video_prediction](video_prediction): predicting future video frames with
-    neural advection.
--- a/research/adv_imagenet_models/README.md
+++ b/research/adv_imagenet_models/README.md
+![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
+![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
+![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
 # Adversarially trained ImageNet models
 Pre-trained ImageNet models from the following papers:

--- a/research/adversarial_crypto/README.md
+++ b/research/adversarial_crypto/README.md
+![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
+![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
+![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
 # Learning to Protect Communications with Adversarial Neural Cryptography
 This is a slightly-updated model used for the paper

--- a/research/adversarial_logit_pairing/README.md
+++ b/research/adversarial_logit_pairing/README.md
+![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
+![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
+![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
 # Adversarial logit pairing

--- a/research/adversarial_text/README.md
+++ b/research/adversarial_text/README.md
+![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
+![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
 # Adversarial Text Classification
 Code for [*Adversarial Training Methods for Semi-Supervised Text Classification*](https://arxiv.org/abs/1605.07725) and [*Semi-Supervised Sequence Learning*](https://arxiv.org/abs/1511.01432).

--- a/research/astronet/README.md
+++ b/research/astronet/README.md
-# AstroNet has moved!
-The code is now located at https://github.com/google-research/exoplanet-ml
--- a/research/audioset/README.md
+++ b/research/audioset/README.md
+![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
+![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
 # Models for AudioSet: A Large Scale Dataset of Audio Events
 This repository provides models and supporting code associated with

--- a/research/audioset/yamnet/README.md
+++ b/research/audioset/yamnet/README.md
@@ -38,7 +38,7 @@ Here's a sample installation and test session:
 ```shell
 # Upgrade pip first. Also make sure wheel is installed.
-python -m pip install --upgrade pip wheel.
+python -m pip install --upgrade pip wheel
 # Install dependences.
 pip install numpy resampy tensorflow soundfile

--- a/research/autoencoder/README.md
+++ b/research/autoencoder/README.md
+![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
+![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
+![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
--- a/research/cognitive_mapping_and_planning/README.md
+++ b/research/cognitive_mapping_and_planning/README.md
+![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
+![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
+![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
 # Cognitive Mapping and Planning for Visual Navigation
 **Saurabh Gupta, James Davidson, Sergey Levine, Rahul Sukthankar, Jitendra Malik**