Merge branch 'master' into RTESuperGLUE

440e0eec · Stephen Wu · GitHub · 51364cdf · 9815ea67 · 440e0eec
Unverified Commit 440e0eec authored Feb 10, 2021 by Stephen Wu Committed by GitHub Feb 10, 2021
20 changed files
--- a/README.md
+++ b/README.md
@@ -14,17 +14,6 @@ can take full advantage of TensorFlow for their research and product development
 ## [Announcements](https://github.com/tensorflow/models/wiki/Announcements)
-| Date | News |
-|------|------|
-| July 10, 2020 | TensorFlow 2 meets the [Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) ([Blog](https://blog.tensorflow.org/2020/07/tensorflow-2-meets-object-detection-api.html)) |
-| June 30, 2020 | [SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization](https://github.com/tensorflow/models/tree/master/official/vision/detection#train-a-spinenet-49-based-mask-r-cnn) released ([Tweet](https://twitter.com/GoogleAI/status/1278016712978264064)) |
-| June 17, 2020 | [Context R-CNN: Long Term Temporal Context for Per-Camera Object Detection](https://github.com/tensorflow/models/tree/master/research/object_detection#june-17th-2020) released ([Tweet](https://twitter.com/GoogleAI/status/1276571419422253057)) |
-| May 21, 2020 | [Unifying Deep Local and Global Features for Image Search (DELG)](https://github.com/tensorflow/models/tree/master/research/delf#delg) code released |
-| May 19, 2020 | [MobileDets: Searching for Object Detection Architectures for Mobile Accelerators](https://github.com/tensorflow/models/tree/master/research/object_detection#may-19th-2020) released |
-| May 7, 2020 | [MnasFPN with MobileNet-V2 backbone](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md#mobile-models) released for object detection |
-| May 1, 2020 | [DELF: DEep Local Features](https://github.com/tensorflow/models/tree/master/research/delf) updated to support TensorFlow 2.1 |
-| March 31, 2020 | [Introducing the Model Garden for TensorFlow 2](https://blog.tensorflow.org/2020/03/introducing-model-garden-for-tensorflow-2.html) ([Tweet](https://twitter.com/TensorFlow/status/1245029834633297921)) |
 ## Contributions
 [![help wanted:paper implementation](https://img.shields.io/github/issues/tensorflow/models/help%20wanted%3Apaper%20implementation)](https://github.com/tensorflow/models/labels/help%20wanted%3Apaper%20implementation)

--- a/official/vision/beta/dataloaders/dataset_fn.py
+++ b/official/vision/beta/dataloaders/dataset_fn.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

--- a/official/core/base_trainer.py
+++ b/official/core/base_trainer.py
@@ -26,6 +26,8 @@ import tensorflow as tf
 from official.core import base_task
 from official.core import config_definitions
+from official.modeling import optimization
 ExperimentConfig = config_definitions.ExperimentConfig
 TrainerConfig = config_definitions.TrainerConfig
@@ -119,6 +121,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
    self._checkpoint_exporter = checkpoint_exporter
    self._recovery = None
+    # Creates a shadow copy of the weights to store weights moving average.
+    if isinstance(self._optimizer, optimization.ExponentialMovingAverage):
+      self._optimizer.shadow_copy(self._model)
    # global_step increases by 1 after each training iteration.
    # We should have global_step.numpy() == self.optimizer.iterations.numpy()
    # when there is only 1 optimizer.
@@ -209,7 +215,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
  @property
  def optimizer(self):
-    return self._optimizer
+    if hasattr(self, "_optimizer"):
+      return self._optimizer
+    else:
+      return None
  @property
  def global_step(self):
@@ -294,6 +303,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
    """Sets up metrics."""
    for metric in self.validation_metrics + [self.validation_loss]:
      metric.reset_states()
+    # Swaps weights to test on weights moving average.
+    if self.optimizer and isinstance(
+        self.optimizer, optimization.ExponentialMovingAverage):
+      self.optimizer.swap_weights()
  def eval_step(self, iterator):
    """See base class."""
@@ -331,6 +344,12 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
      logs["best_" +
           metric_name] = self._checkpoint_exporter.best_ckpt_logs[metric_name]
+    # Swaps back weights after testing when EMA is used.
+    # This happens after best checkpoint export so that average weights used for
+    # eval are exported instead of regular weights.
+    if self.optimizer and isinstance(
+        self.optimizer, optimization.ExponentialMovingAverage):
+      self.optimizer.swap_weights()
    return logs
  def eval_reduce(self, state=None, step_outputs=None):

--- a/official/modeling/progressive/trainer.py
+++ b/official/modeling/progressive/trainer.py
@@ -32,6 +32,7 @@ from official.core import base_task
 from official.core import base_trainer as trainer_lib
 from official.core import config_definitions
 from official.modeling.progressive import policies
+from official.modeling.progressive import utils
 ExperimentConfig = config_definitions.ExperimentConfig
@@ -61,26 +62,6 @@ class ProgressiveTrainerConfig(config_definitions.TrainerConfig):
  export_only_final_stage_ckpt: bool = True
-class CheckpointWithHooks(tf.train.Checkpoint):
-  """Same as tf.train.Checkpoint but supports hooks.
-  When running continuous_eval jobs, when a new checkpoint arrives, we have to
-  update our model and optimizer etc. to match the stage_id of the checkpoint.
-  However, when orbit loads a checkpoint, it does not inform us. So we  use this
-  class to update our model to the correct stage before checkpoint restore.
-  """
-  def __init__(self, before_load_hook, **kwargs):
-    self._before_load_hook = before_load_hook
-    super(CheckpointWithHooks, self).__init__(**kwargs)
-  # override
-  def read(self, save_path, options=None):
-    self._before_load_hook(save_path)
-    logging.info('Ran before_load_hook.')
-    super(CheckpointWithHooks, self).read(save_path=save_path, options=options)
 @gin.configurable
 class ProgressiveTrainer(trainer_lib.Trainer):
  """Implements the progressive trainer shared for TensorFlow models."""
@@ -124,7 +105,7 @@ class ProgressiveTrainer(trainer_lib.Trainer):
    self._global_step = orbit.utils.create_global_step()
-    self._checkpoint = CheckpointWithHooks(
+    self._checkpoint = utils.CheckpointWithHooks(
        before_load_hook=self._update_pt_stage_from_ckpt,
        global_step=self.global_step,
        **self._task.cur_checkpoint_items)

--- a/official/modeling/progressive/utils.py
+++ b/official/modeling/progressive/utils.py
@@ -14,6 +14,9 @@
 """Util classes and functions."""
+from absl import logging
+import tensorflow as tf
 # pylint: disable=g-direct-tensorflow-import
 from tensorflow.python.training.tracking import tracking
@@ -29,3 +32,25 @@ class VolatileTrackable(tracking.AutoTrackable):
    for k, v in kwargs.items():
      delattr(self, k)  # untrack this object
      setattr(self, k, v)  # track the new object
+class CheckpointWithHooks(tf.train.Checkpoint):
+  """Same as tf.train.Checkpoint but supports hooks.
+  In progressive training, use this class instead of tf.train.Checkpoint.
+  Since the network architecture changes during progressive training, we need to
+  prepare something (like switch to the correct architecture) before loading the
+  checkpoint. This class supports a hook that will be executed before checkpoint
+  loading.
+  """
+  def __init__(self, before_load_hook, **kwargs):
+    self._before_load_hook = before_load_hook
+    super(CheckpointWithHooks, self).__init__(**kwargs)
+  # override
+  def read(self, save_path, options=None):
+    self._before_load_hook(save_path)
+    logging.info('Ran before_load_hook.')
+    super(CheckpointWithHooks, self).read(save_path=save_path, options=options)
--- a/official/nlp/data/classifier_data_lib.py
+++ b/official/nlp/data/classifier_data_lib.py
@@ -17,8 +17,8 @@
 import collections
 import csv
 import importlib
-import os
 import json
+import os
 from absl import logging
 import tensorflow as tf
@@ -1285,6 +1285,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
  return feature
 class AXgProcessor(DataProcessor):
  """Processor for the AXg dataset (SuperGLUE diagnostics dataset)."""

--- a/official/nlp/data/create_finetuning_data.py
+++ b/official/nlp/data/create_finetuning_data.py
@@ -244,7 +244,6 @@ def generate_classifier_dataset():
            classifier_data_lib.AXgProcessor,
        "rte-superglue":
            classifier_data_lib.RTESuperGLUEProcessor
    }
    task_name = FLAGS.classification_task_name.lower()
    if task_name not in processors:

--- a/official/nlp/data/pretrain_dataloader.py
+++ b/official/nlp/data/pretrain_dataloader.py
@@ -61,8 +61,7 @@ class BertPretrainDataLoader(data_loader.DataLoader):
    self._use_next_sentence_label = params.use_next_sentence_label
    self._use_position_id = params.use_position_id
-  def _decode(self, record: tf.Tensor):
+  def _name_to_features(self):
-    """Decodes a serialized tf.Example."""
    name_to_features = {
        'input_mask':
            tf.io.FixedLenFeature([self._seq_length], tf.int64),
@@ -89,7 +88,11 @@ class BertPretrainDataLoader(data_loader.DataLoader):
    if self._use_position_id:
      name_to_features['position_ids'] = tf.io.FixedLenFeature(
          [self._seq_length], tf.int64)
+    return name_to_features
+  def _decode(self, record: tf.Tensor):
+    """Decodes a serialized tf.Example."""
+    name_to_features = self._name_to_features()
    example = tf.io.parse_single_example(record, name_to_features)
    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.

--- a/official/nlp/data/sentence_prediction_dataloader.py
+++ b/official/nlp/data/sentence_prediction_dataloader.py
@@ -14,12 +14,16 @@
 # limitations under the License.
 # ==============================================================================
 """Loads dataset for the sentence prediction (classification) task."""
-from typing import Mapping, Optional
+from typing import List, Mapping, Optional
 import dataclasses
 import tensorflow as tf
+import tensorflow_hub as hub
+from official.common import dataset_fn
 from official.core import config_definitions as cfg
 from official.core import input_reader
+from official.nlp import modeling
 from official.nlp.data import data_loader
 from official.nlp.data import data_loader_factory
@@ -89,3 +93,152 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
    reader = input_reader.InputReader(
        params=self._params, decoder_fn=self._decode, parser_fn=self._parse)
    return reader.read(input_context)
+@dataclasses.dataclass
+class SentencePredictionTextDataConfig(cfg.DataConfig):
+  """Data config for sentence prediction task with raw text."""
+  # Either set `input_path`...
+  input_path: str = ''
+  # Either `int` or `float`.
+  label_type: str = 'int'
+  # ...or `tfds_name` and `tfds_split` to specify input.
+  tfds_name: str = ''
+  tfds_split: str = ''
+  # The name of the text feature fields. The text features will be
+  # concatenated in order.
+  text_fields: Optional[List[str]] = None
+  label_field: str = 'label'
+  global_batch_size: int = 32
+  seq_length: int = 128
+  is_training: bool = True
+  # Either build preprocessing with Python code by specifying these values
+  # for modeling.layers.BertTokenizer()/SentencepieceTokenizer()....
+  tokenization: str = 'WordPiece'  # WordPiece or SentencePiece
+  # Text vocab file if tokenization is WordPiece, or sentencepiece.ModelProto
+  # file if tokenization is SentencePiece.
+  vocab_file: str = ''
+  lower_case: bool = True
+  # ...or load preprocessing from a SavedModel at this location.
+  preprocessing_hub_module_url: str = ''
+  # Either tfrecord or sstsable or recordio.
+  file_type: str = 'tfrecord'
+class TextProcessor(tf.Module):
+  """Text features processing for sentence prediction task."""
+  def __init__(self,
+               seq_length: int,
+               vocab_file: Optional[str] = None,
+               tokenization: Optional[str] = None,
+               lower_case: Optional[bool] = True,
+               preprocessing_hub_module_url: Optional[str] = None):
+    if preprocessing_hub_module_url:
+      self._preprocessing_hub_module = hub.load(preprocessing_hub_module_url)
+      self._tokenizer = self._preprocessing_hub_module.tokenize
+      def set_shape(t):
+        # Before TF2.4, the sequence length dimension loaded from the
+        # preprocessing hub module is None, so we recover the shape here.
+        # TODO(b/157636658): Remove once TF2.4 is released and being used.
+        t.set_shape([None, seq_length])
+        return t
+      def pack_inputs_fn(inputs):
+        result = self._preprocessing_hub_module.bert_pack_inputs(
+            inputs, seq_length=seq_length)
+        result = tf.nest.map_structure(set_shape, result)
+        return result
+      self._pack_inputs = pack_inputs_fn
+      return
+    if tokenization == 'WordPiece':
+      self._tokenizer = modeling.layers.BertTokenizer(
+          vocab_file=vocab_file, lower_case=lower_case)
+    elif tokenization == 'SentencePiece':
+      self._tokenizer = modeling.layers.SentencepieceTokenizer(
+          model_file_path=vocab_file, lower_case=lower_case,
+          strip_diacritics=True)  # Strip diacritics to follow ALBERT model
+    else:
+      raise ValueError('Unsupported tokenization: %s' % tokenization)
+    self._pack_inputs = modeling.layers.BertPackInputs(
+        seq_length=seq_length,
+        special_tokens_dict=self._tokenizer.get_special_tokens_dict())
+  def __call__(self, segments):
+    segments = [self._tokenizer(s) for s in segments]
+    # BertTokenizer returns a RaggedTensor with shape [batch, word, subword],
+    # and SentencepieceTokenizer returns a RaggedTensor with shape
+    # [batch, sentencepiece],
+    segments = [
+        tf.cast(x.merge_dims(1, -1) if x.shape.rank > 2 else x, tf.int32)
+        for x in segments
+    ]
+    return self._pack_inputs(segments)
+@data_loader_factory.register_data_loader_cls(SentencePredictionTextDataConfig)
+class SentencePredictionTextDataLoader(data_loader.DataLoader):
+  """Loads dataset with raw text for sentence prediction task."""
+  def __init__(self, params):
+    if bool(params.tfds_name) != bool(params.tfds_split):
+      raise ValueError('`tfds_name` and `tfds_split` should be specified or '
+                       'unspecified at the same time.')
+    if bool(params.tfds_name) == bool(params.input_path):
+      raise ValueError('Must specify either `tfds_name` and `tfds_split` '
+                       'or `input_path`.')
+    if not params.text_fields:
+      raise ValueError('Unexpected empty text fields.')
+    if bool(params.vocab_file) == bool(params.preprocessing_hub_module_url):
+      raise ValueError('Must specify exactly one of vocab_file (with matching '
+                       'lower_case flag) or preprocessing_hub_module_url.')
+    self._params = params
+    self._text_fields = params.text_fields
+    self._label_field = params.label_field
+    self._label_type = params.label_type
+    self._text_processor = TextProcessor(
+        seq_length=params.seq_length,
+        vocab_file=params.vocab_file,
+        tokenization=params.tokenization,
+        lower_case=params.lower_case,
+        preprocessing_hub_module_url=params.preprocessing_hub_module_url)
+  def _bert_preprocess(self, record: Mapping[str, tf.Tensor]):
+    """Berts preprocess."""
+    segments = [record[x] for x in self._text_fields]
+    model_inputs = self._text_processor(segments)
+    y = record[self._label_field]
+    return model_inputs, y
+  def _decode(self, record: tf.Tensor):
+    """Decodes a serialized tf.Example."""
+    name_to_features = {}
+    for text_field in self._text_fields:
+      name_to_features[text_field] = tf.io.FixedLenFeature([], tf.string)
+    label_type = LABEL_TYPES_MAP[self._label_type]
+    name_to_features[self._label_field] = tf.io.FixedLenFeature([], label_type)
+    example = tf.io.parse_single_example(record, name_to_features)
+    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
+    # So cast all int64 to int32.
+    for name in example:
+      t = example[name]
+      if t.dtype == tf.int64:
+        t = tf.cast(t, tf.int32)
+      example[name] = t
+    return example
+  def load(self, input_context: Optional[tf.distribute.InputContext] = None):
+    """Returns a tf.dataset.Dataset."""
+    reader = input_reader.InputReader(
+        dataset_fn=dataset_fn.pick_dataset_fn(self._params.file_type),
+        decoder_fn=self._decode if self._params.input_path else None,
+        params=self._params,
+        postprocess_fn=self._bert_preprocess)
+    return reader.read(input_context)
--- a/official/nlp/data/sentence_prediction_dataloader_test.py
+++ b/official/nlp/data/sentence_prediction_dataloader_test.py
@@ -20,10 +20,11 @@ from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
-from official.nlp.data import sentence_prediction_dataloader
+from sentencepiece import SentencePieceTrainer
+from official.nlp.data import sentence_prediction_dataloader as loader
-def _create_fake_dataset(output_path, seq_length, label_type):
+def _create_fake_preprocessed_dataset(output_path, seq_length, label_type):
  """Creates a fake dataset."""
  writer = tf.io.TFRecordWriter(output_path)
@@ -54,6 +55,70 @@ def _create_fake_dataset(output_path, seq_length, label_type):
  writer.close()
+def _create_fake_raw_dataset(output_path, text_fields, label_type):
+  """Creates a fake tf record file."""
+  writer = tf.io.TFRecordWriter(output_path)
+  def create_str_feature(value):
+    f = tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
+    return f
+  def create_int_feature(values):
+    f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
+    return f
+  def create_float_feature(values):
+    f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
+    return f
+  for _ in range(100):
+    features = {}
+    for text_field in text_fields:
+      features[text_field] = create_str_feature([b'hello world'])
+    if label_type == 'int':
+      features['label'] = create_int_feature([0])
+    elif label_type == 'float':
+      features['label'] = create_float_feature([0.5])
+    else:
+      raise ValueError('Unexpected label_type: %s' % label_type)
+    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+    writer.write(tf_example.SerializeToString())
+  writer.close()
+def _create_fake_sentencepiece_model(output_dir):
+  vocab = ['a', 'b', 'c', 'd', 'e', 'abc', 'def', 'ABC', 'DEF']
+  model_prefix = os.path.join(output_dir, 'spm_model')
+  input_text_file_path = os.path.join(output_dir, 'train_input.txt')
+  with tf.io.gfile.GFile(input_text_file_path, 'w') as f:
+    f.write(' '.join(vocab + ['\n']))
+  # Add 7 more tokens: <pad>, <unk>, [CLS], [SEP], [MASK], <s>, </s>.
+  full_vocab_size = len(vocab) + 7
+  flags = dict(
+      model_prefix=model_prefix,
+      model_type='word',
+      input=input_text_file_path,
+      pad_id=0,
+      unk_id=1,
+      control_symbols='[CLS],[SEP],[MASK]',
+      vocab_size=full_vocab_size,
+      bos_id=full_vocab_size - 2,
+      eos_id=full_vocab_size - 1)
+  SentencePieceTrainer.Train(' '.join(
+      ['--{}={}'.format(k, v) for k, v in flags.items()]))
+  return model_prefix + '.model'
+def _create_fake_vocab_file(vocab_file_path):
+  tokens = ['[PAD]']
+  for i in range(1, 100):
+    tokens.append('[unused%d]' % i)
+  tokens.extend(['[UNK]', '[CLS]', '[SEP]', '[MASK]', 'hello', 'world'])
+  with tf.io.gfile.GFile(vocab_file_path, 'w') as outfile:
+    outfile.write('\n'.join(tokens))
 class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase):
  @parameterized.parameters(('int', tf.int32), ('float', tf.float32))
@@ -61,14 +126,13 @@ class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase):
    input_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
    batch_size = 10
    seq_length = 128
-    _create_fake_dataset(input_path, seq_length, label_type)
+    _create_fake_preprocessed_dataset(input_path, seq_length, label_type)
-    data_config = sentence_prediction_dataloader.SentencePredictionDataConfig(
+    data_config = loader.SentencePredictionDataConfig(
        input_path=input_path,
        seq_length=seq_length,
        global_batch_size=batch_size,
        label_type=label_type)
-    dataset = sentence_prediction_dataloader.SentencePredictionDataLoader(
+    dataset = loader.SentencePredictionDataLoader(data_config).load()
-        data_config).load()
    features, labels = next(iter(dataset))
    self.assertCountEqual(['input_word_ids', 'input_mask', 'input_type_ids'],
                          features.keys())
@@ -79,5 +143,108 @@ class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase):
    self.assertEqual(labels.dtype, expected_label_type)
+class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
+                                           parameterized.TestCase):
+  @parameterized.parameters(True, False)
+  def test_python_wordpiece_preprocessing(self, use_tfds):
+    batch_size = 10
+    seq_length = 256  # Non-default value.
+    lower_case = True
+    tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
+    text_fields = ['sentence1', 'sentence2']
+    if not use_tfds:
+      _create_fake_raw_dataset(tf_record_path, text_fields, label_type='int')
+    vocab_file_path = os.path.join(self.get_temp_dir(), 'vocab.txt')
+    _create_fake_vocab_file(vocab_file_path)
+    data_config = loader.SentencePredictionTextDataConfig(
+        input_path='' if use_tfds else tf_record_path,
+        tfds_name='glue/mrpc' if use_tfds else '',
+        tfds_split='train' if use_tfds else '',
+        text_fields=text_fields,
+        global_batch_size=batch_size,
+        seq_length=seq_length,
+        is_training=True,
+        lower_case=lower_case,
+        vocab_file=vocab_file_path)
+    dataset = loader.SentencePredictionTextDataLoader(data_config).load()
+    features, labels = next(iter(dataset))
+    self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'],
+                          features.keys())
+    self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
+    self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
+    self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
+    self.assertEqual(labels.shape, (batch_size,))
+  @parameterized.parameters(True, False)
+  def test_python_sentencepiece_preprocessing(self, use_tfds):
+    batch_size = 10
+    seq_length = 256  # Non-default value.
+    lower_case = True
+    tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
+    text_fields = ['sentence1', 'sentence2']
+    if not use_tfds:
+      _create_fake_raw_dataset(tf_record_path, text_fields, label_type='int')
+    sp_model_file_path = _create_fake_sentencepiece_model(self.get_temp_dir())
+    data_config = loader.SentencePredictionTextDataConfig(
+        input_path='' if use_tfds else tf_record_path,
+        tfds_name='glue/mrpc' if use_tfds else '',
+        tfds_split='train' if use_tfds else '',
+        text_fields=text_fields,
+        global_batch_size=batch_size,
+        seq_length=seq_length,
+        is_training=True,
+        lower_case=lower_case,
+        tokenization='SentencePiece',
+        vocab_file=sp_model_file_path,
+    )
+    dataset = loader.SentencePredictionTextDataLoader(data_config).load()
+    features, labels = next(iter(dataset))
+    self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'],
+                          features.keys())
+    self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
+    self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
+    self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
+    self.assertEqual(labels.shape, (batch_size,))
+  @parameterized.parameters(True, False)
+  def test_saved_model_preprocessing(self, use_tfds):
+    batch_size = 10
+    seq_length = 256  # Non-default value.
+    tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
+    text_fields = ['sentence1', 'sentence2']
+    if not use_tfds:
+      _create_fake_raw_dataset(tf_record_path, text_fields, label_type='float')
+    vocab_file_path = os.path.join(self.get_temp_dir(), 'vocab.txt')
+    _create_fake_vocab_file(vocab_file_path)
+    data_config = loader.SentencePredictionTextDataConfig(
+        input_path='' if use_tfds else tf_record_path,
+        tfds_name='glue/mrpc' if use_tfds else '',
+        tfds_split='train' if use_tfds else '',
+        text_fields=text_fields,
+        global_batch_size=batch_size,
+        seq_length=seq_length,
+        is_training=True,
+        preprocessing_hub_module_url=(
+            'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'),
+        label_type='int' if use_tfds else 'float',
+    )
+    dataset = loader.SentencePredictionTextDataLoader(data_config).load()
+    features, labels = next(iter(dataset))
+    self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'],
+                          features.keys())
+    self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
+    self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
+    self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
+    self.assertEqual(labels.shape, (batch_size,))
 if __name__ == '__main__':
  tf.test.main()
--- a/official/utils/docs/build_docs.py
+++ b/official/utils/docs/build_docs.py
+# Lint as: python3
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Tool to generate api_docs for tensorflow_models/official library.
+Example:
+python build_docs \
+ --output_dir=/tmp/api_docs \
+ --project_short_name=tf_nlp.modeling \
+ --project_full_name="TensorFlow Official Models - NLP Modeling Library"
+"""
+import os
+from absl import app
+from absl import flags
+from absl import logging
+import tensorflow as tf
+from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import generate_lib
+from tensorflow_docs.api_generator import public_api
+from official.nlp import modeling as tf_nlp_modeling
+FLAGS = flags.FLAGS
+flags.DEFINE_string('output_dir', None, 'Where to write the resulting docs to.')
+flags.DEFINE_string(
+    'code_url_prefix',
+    'https://github.com/tensorflow/models/blob/master/official/nlp/modeling/',
+    'The url prefix for links to code.')
+flags.DEFINE_bool('search_hints', True,
+                  'Include metadata search hints in the generated files')
+flags.DEFINE_string('site_path', 'tf_nlp_modeling/api_docs/python',
+                    'Path prefix in the _toc.yaml')
+flags.DEFINE_bool('gen_report', False,
+                  'Generate an API report containing the health of the '
+                  'docstrings of the public API.')
+flags.DEFINE_string(
+    'project_short_name', 'tf_nlp.modeling',
+    'The project short name referring to the python module to document.')
+flags.DEFINE_string('project_full_name',
+                    'TensorFlow Official Models - NLP Modeling Library',
+                    'The main title for the project.')
+def _hide_module_model_and_layer_methods():
+  """Hide methods and properties defined in the base classes of Keras layers.
+  We hide all methods and properties of the base classes, except:
+  - `__init__` is always documented.
+  - `call` is always documented, as it can carry important information for
+    complex layers.
+  """
+  module_contents = list(tf.Module.__dict__.items())
+  model_contents = list(tf.keras.Model.__dict__.items())
+  layer_contents = list(tf.keras.layers.Layer.__dict__.items())
+  for name, obj in module_contents + layer_contents + model_contents:
+    if name == '__init__':
+      # Always document __init__.
+      continue
+    if name == 'call':
+      # Always document `call`.
+      if hasattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS):  # pylint: disable=protected-access
+        delattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS)  # pylint: disable=protected-access
+      continue
+    # Otherwise, exclude from documentation.
+    if isinstance(obj, property):
+      obj = obj.fget
+    if isinstance(obj, (staticmethod, classmethod)):
+      obj = obj.__func__
+    try:
+      doc_controls.do_not_doc_in_subclasses(obj)
+    except AttributeError:
+      pass
+def gen_api_docs(code_url_prefix, site_path, output_dir, gen_report,
+                 project_short_name, project_full_name, search_hints):
+  """Generates api docs for the tensorflow docs package."""
+  _hide_module_model_and_layer_methods()
+  doc_generator = generate_lib.DocGenerator(
+      root_title=project_full_name,
+      py_modules=[(project_short_name, tf_nlp_modeling)],
+      base_dir=os.path.dirname(tf_nlp_modeling.__file__),
+      code_url_prefix=code_url_prefix,
+      search_hints=search_hints,
+      site_path=site_path,
+      gen_report=gen_report,
+      callbacks=[public_api.explicit_package_contents_filter],
+  )
+  doc_generator.build(output_dir)
+  logging.info('Output docs to: %s', output_dir)
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+  gen_api_docs(
+      code_url_prefix=FLAGS.code_url_prefix,
+      site_path=FLAGS.site_path,
+      output_dir=FLAGS.output_dir,
+      gen_report=FLAGS.gen_report,
+      project_short_name=FLAGS.project_short_name,
+      project_full_name=FLAGS.project_full_name,
+      search_hints=FLAGS.search_hints)
+if __name__ == '__main__':
+  flags.mark_flag_as_required('output_dir')
+  app.run(main)
--- a/official/utils/docs/build_docs_test.py
+++ b/official/utils/docs/build_docs_test.py
+# Lint as: python3
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for official.tools.build_docs."""
+import os
+import shutil
+import tensorflow as tf
+from official.utils.docs import build_docs
+class BuildDocsTest(tf.test.TestCase):
+  def setUp(self):
+    super(BuildDocsTest, self).setUp()
+    self.workdir = self.get_temp_dir()
+    if os.path.exists(self.workdir):
+      shutil.rmtree(self.workdir)
+    os.makedirs(self.workdir)
+  def test_api_gen(self):
+    build_docs.gen_api_docs(
+        code_url_prefix="http://official/nlp/modeling/",
+        site_path="tf_nlp_modeling/api_docs/python",
+        output_dir=self.workdir,
+        gen_report=False,
+        project_short_name="tf_nlp_modeling",
+        project_full_name="TensorFlow Modeling - NLP Library",
+        search_hints=True)
+    # Check that the "defined in" section is working
+    with open(os.path.join(self.workdir, "tf_nlp_modeling.md")) as f:
+      content = f.read()
+    self.assertIn("__init__.py", content)
+if __name__ == "__main__":
+  tf.test.main()
--- a/official/vision/beta/MODEL_GARDEN.md
+++ b/official/vision/beta/MODEL_GARDEN.md
@@ -68,3 +68,25 @@ TF Vision model garden provides a large collection of baselines and checkpoints
 | backbone        | resolution    | epochs  | FLOPs (B)  | params (M)  |  box AP |  mask AP  |   download |
 | ------------ |:-------------:| ---------:|-----------:|--------:|--------:|-----------:|-----------:|
 | SpineNet-49  | 640x640       |    350    | 215.7 | 40.8 | 42.6 | 37.9 | config |
+## Video Classification
+### Common Settings and Notes
+* We provide models for video classification with two backbones: [SlowOnly](https://arxiv.org/abs/1812.03982) and 3D-ResNet (R3D) used in [Spatiotemporal Contrastive Video Representation Learning](https://arxiv.org/abs/2008.03800).
+* Training and evaluation details:
+  * All models are trained from scratch with vision modality (RGB) for 200 epochs.
+  * We use batch size of 1024 and cosine learning rate decay with linear warmup in first 5 epochs.
+  * We follow [SlowFast](https://arxiv.org/abs/1812.03982) to perform 30-view evaluation.
+### Kinetics-400 Action Recognition Baselines
+| model    | input (frame x stride) |  Top-1  |  Top-5  | download |
+| -------- |:----------------------:|--------:|--------:|---------:|
+| SlowOnly | 8 x 8                  |  74.1   |  91.4   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml) |
+| SlowOnly | 16 x 4                 |  75.6   |  92.1   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml) |
+| R3D-50   | 32 x 2                 |  77.0   |  93.0   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml) |
+### Kinetics-600 Action Recognition Baselines
+| model    | input (frame x stride) |  Top-1  |  Top-5  | download |
+| -------- |:----------------------:|--------:|--------:|---------:|
+| SlowOnly | 8 x 8                  |  77.3   |  93.6   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml) |
+| R3D-50   | 32 x 2                 |  79.5   |  94.8   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml) |
--- a/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tfds_tpu.yaml
+++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tfds_tpu.yaml
+# ResNet-50 ImageNet classification. 78.1% top-1 and 93.9% top-5 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+    norm_activation:
+      activation: 'swish'
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: ''
+    tfds_name: 'imagenet2012'
+    tfds_split: 'train'
+    sharding: true
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: ''
+    tfds_name: 'imagenet2012'
+    tfds_split: 'validation'
+    sharding: true
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 62400
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  annotation_file: ''  # Can't use annotation file when tfds is used.
+  losses:
+    l2_weight_decay: 0.0001
+  model:
+    num_classes: 91
+    max_level: 7
+    min_level: 3
+    input_size: [640, 640, 3]
+    norm_activation:
+      activation: relu
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    tfds_name: 'coco/2017'
+    tfds_split: 'train'
+    drop_remainder: true
+    dtype: bfloat16
+    global_batch_size: 256
+    input_path: ''
+    is_training: true
+    shuffle_buffer_size: 1000
+  validation_data:
+    tfds_name: 'coco/2017'
+    tfds_split: 'validation'
+    drop_remainder: true
+    dtype: bfloat16
+    global_batch_size: 8
+    input_path: ''
+    is_training: false
--- a/official/vision/beta/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
+++ b/official/vision/beta/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
+# Use your own cityscapes preprocessed dataset. 79% meanIoU.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+task:
+  model:
+    num_classes: 19
+    input_size: [null, null, 3]
+    backbone:
+      type: 'dilated_resnet'
+      dilated_resnet:
+        model_id: 101
+        output_stride: 16
+        stem_type: 'v1'
+        se_ratio: 0.25
+        stochastic_depth_drop_rate: 0.2
+        multigrid: [1, 2, 4]
+        last_stage_repeats: 1
+    decoder:
+      aspp:
+        pool_kernel_size: [512, 1024]
+    head:
+      feature_fusion: 'deeplabv3plus'
+      low_level: 2
+      low_level_num_filters: 48
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  losses:
+    top_k_percent_pixels: 1.0  # only backpropagate loss for the topk 100% pixels.
+  train_data:
+    output_size: [512, 1024]
+    train_on_crops: true
+    input_path: ''
+    tfds_name: 'cityscapes/semantic_segmentation'
+    tfds_split: 'train'
+    is_training: true
+    global_batch_size: 16
+    dtype: 'float32'
+    aug_rand_hflip: true
+    aug_scale_max: 2.0
+    aug_scale_min: 0.5
+  validation_data:
+    output_size: [1024, 2048]
+    input_path: ''
+    tfds_name: 'cityscapes/semantic_segmentation'
+    tfds_split: 'validation'
+    is_training: false
+    global_batch_size: 16
+    dtype: 'float32'
+    drop_remainder: false
+    resize_eval_groundtruth: true
+trainer:
+  optimizer_config:
+    learning_rate:
+      polynomial:
+        decay_steps: 90000
+        initial_learning_rate: 0.01
+        power: 0.9
+      type: polynomial
+    optimizer:
+      sgd:
+        momentum: 0.9
+      type: sgd
+    warmup:
+      linear:
+        name: linear
+        warmup_learning_rate: 0
+        warmup_steps: 925
+      type: linear
+  steps_per_loop: 185
+  summary_interval: 185
+  train_steps: 90000
+  validation_interval: 185
+  validation_steps: 31
+  checkpoint_interval: 185
--- a/official/vision/beta/data/__init__.py
+++ b/official/vision/beta/data/__init__.py
--- a/official/vision/beta/data/create_coco_tf_record.py
+++ b/official/vision/beta/data/create_coco_tf_record.py
@@ -283,11 +283,11 @@ def generate_annotations(images, image_dir,
  """Generator for COCO annotations."""
  for image in images:
-    if img_to_obj_annotation:
+    object_annotation = (img_to_obj_annotation.get(image['id'], None) if
-      object_annotation = img_to_obj_annotation.get(image['id'], None)
+                         img_to_obj_annotation else None)
-    if img_to_caption_annotation:
+    caption_annotaion = (img_to_caption_annotation.get(image['id'], None) if
-      caption_annotaion = img_to_caption_annotation.get(image['id'], None)
+                         img_to_caption_annotation else None)
    yield (image, image_dir, object_annotation, id_to_name_map,
           caption_annotaion, include_masks)

--- a/official/vision/beta/dataloaders/__init__.py
+++ b/official/vision/beta/dataloaders/__init__.py
--- a/official/vision/beta/dataloaders/tfds_classification_decoders.py
+++ b/official/vision/beta/dataloaders/tfds_classification_decoders.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TFDS Classification decoders."""
+import tensorflow as tf
+from official.vision.beta.dataloaders import decoder
+class ClassificationDecorder(decoder.Decoder):
+  """A tf.Example decoder for tfds classification datasets."""
+  def decode(self, serialized_example):
+    sample_dict = {
+        'image/encoded':
+            tf.io.encode_jpeg(serialized_example['image'], quality=100),
+        'image/class/label':
+            serialized_example['label'],
+    }
+    return sample_dict
+TFDS_ID_TO_DECODER_MAP = {
+    'cifar10': ClassificationDecorder,
+    'cifar100': ClassificationDecorder,
+    'imagenet2012': ClassificationDecorder,
+}