Merge branch 'master' into move_to_keraslayers_fasterrcnn_fpn_keras_feature_extractor

0cceabfc · Yiming Shi · GitHub · 17821c0d · 39ee0ac9 · 0cceabfc
Unverified Commit 0cceabfc authored Aug 03, 2020 by Yiming Shi Committed by GitHub Aug 03, 2020
20 changed files
--- a/official/vision/detection/modeling/retinanet_model.py
+++ b/official/vision/detection/modeling/retinanet_model.py
@@ -20,12 +20,12 @@ from __future__ import print_function

 import tensorflow as tf

-from tensorflow.python.keras import backend
 from official.vision.detection.dataloader import mode_keys
 from official.vision.detection.evaluation import factory as eval_factory
 from official.vision.detection.modeling import base_model
 from official.vision.detection.modeling import losses
 from official.vision.detection.modeling.architecture import factory
+from official.vision.detection.modeling.architecture import keras_utils
 from official.vision.detection.ops import postprocess_ops


@@ -57,7 +57,7 @@ class RetinanetModel(base_model.Model):
        params.postprocess)

    self._transpose_input = params.train.transpose_input
-    assert not self._transpose_input, 'Transpose input is not supportted.'
+    assert not self._transpose_input, 'Transpose input is not supported.'
    # Input layer.
    input_shape = (
        params.retinanet_parser.output_size +
@@ -120,7 +120,7 @@ class RetinanetModel(base_model.Model):

  def build_model(self, params, mode=None):
    if self._keras_model is None:
-      with backend.get_graph().as_default():
+      with keras_utils.maybe_enter_backend_graph():
        outputs = self.model_outputs(self._input_layer, mode)

        model = tf.keras.models.Model(

--- a/official/vision/detection/modeling/shapemask_model.py
+++ b/official/vision/detection/modeling/shapemask_model.py
@@ -20,13 +20,13 @@ from __future__ import print_function

 import tensorflow as tf

-from tensorflow.python.keras import backend
 from official.vision.detection.dataloader import anchor
 from official.vision.detection.dataloader import mode_keys
 from official.vision.detection.evaluation import factory as eval_factory
 from official.vision.detection.modeling import base_model
 from official.vision.detection.modeling import losses
 from official.vision.detection.modeling.architecture import factory
+from official.vision.detection.modeling.architecture import keras_utils
 from official.vision.detection.ops import postprocess_ops
 from official.vision.detection.utils import box_utils

@@ -265,7 +265,7 @@ class ShapeMaskModel(base_model.Model):
  def build_model(self, params, mode):
    if self._keras_model is None:
      input_layers = self.build_input_layers(self._params, mode)
-      with backend.get_graph().as_default():
+      with keras_utils.maybe_enter_backend_graph():
        outputs = self.model_outputs(input_layers, mode)

        model = tf.keras.models.Model(

--- a/official/vision/image_classification/README.md
+++ b/official/vision/image_classification/README.md
@@ -119,6 +119,24 @@ python3 classifier_trainer.py \
  --params_override='runtime.num_gpus=$NUM_GPUS'
 ```

+To train on multiple hosts, each with GPUs attached using
+[MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy)
+please update `runtime` section in gpu.yaml
+(or override using `--params_override`) with:
+
+```YAML
+# gpu.yaml
+runtime:
+  distribution_strategy: 'multi_worker_mirrored'
+  worker_hosts: '$HOST1:port,$HOST2:port'
+  num_gpus: $NUM_GPUS
+  task_index: 0
+```
+By having `task_index: 0` on the first host and `task_index: 1` on the second
+and so on. `$HOST1` and `$HOST2` are the IP addresses of the hosts, and `port`
+can be chosen any free port on the hosts. Only the first host will write
+TensorBoard Summaries and save checkpoints.
+
 #### On TPU:
 ```bash
 python3 classifier_trainer.py \

--- a/official/vision/image_classification/classifier_trainer.py
+++ b/official/vision/image_classification/classifier_trainer.py
@@ -235,9 +235,6 @@ def initialize(params: base_configs.ExperimentConfig,
  else:
    data_format = 'channels_last'
  tf.keras.backend.set_image_data_format(data_format)
-  distribution_utils.configure_cluster(
-      params.runtime.worker_hosts,
-      params.runtime.task_index)
  if params.runtime.run_eagerly:
    # Enable eager execution to allow step-by-step debugging
    tf.config.experimental_run_functions_eagerly(True)
@@ -296,6 +293,10 @@ def train_and_eval(
  """Runs the train and eval path using compile/fit."""
  logging.info('Running train and eval.')

+  distribution_utils.configure_cluster(
+      params.runtime.worker_hosts,
+      params.runtime.task_index)
+
  # Note: for TPUs, strategy and scope should be created before the dataset
  strategy = strategy_override or distribution_utils.get_distribution_strategy(
      distribution_strategy=params.runtime.distribution_strategy,
@@ -338,7 +339,8 @@ def train_and_eval(
    optimizer = optimizer_factory.build_optimizer(
        optimizer_name=params.model.optimizer.name,
        base_learning_rate=learning_rate,
-        params=params.model.optimizer.as_dict())
+        params=params.model.optimizer.as_dict(),
+        model=model)

    metrics_map = _get_metrics(one_hot)
    metrics = [metrics_map[metric] for metric in params.train.metrics]

--- a/official/vision/image_classification/optimizer_factory.py
+++ b/official/vision/image_classification/optimizer_factory.py
@@ -18,11 +18,12 @@ from __future__ import division
 # from __future__ import google_type_annotations
 from __future__ import print_function

+from typing import Any, Dict, Text, List
+
 from absl import logging
 import tensorflow as tf
 import tensorflow_addons as tfa

-from typing import Any, Dict, Text, List
 from official.vision.image_classification import learning_rate
 from official.vision.image_classification.configs import base_configs

@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer):
 def build_optimizer(
    optimizer_name: Text,
    base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule,
-    params: Dict[Text, Any]):
+    params: Dict[Text, Any],
+    model: tf.keras.Model = None):
  """Build the optimizer based on name.

  Args:
@@ -261,6 +263,8 @@ def build_optimizer(
    params: String -> Any dictionary representing the optimizer params.
      This should contain optimizer specific parameters such as
      `base_learning_rate`, `decay`, etc.
+    model: The `tf.keras.Model`. This is used for the shadow copy if using
+      `MovingAverage`.

  Returns:
    A tf.keras.Optimizer.
@@ -322,10 +326,13 @@ def build_optimizer(
  # Moving average should be applied last, as it's applied at test time
  moving_average_decay = params.get('moving_average_decay', 0.)
  if moving_average_decay is not None and moving_average_decay > 0.:
+    if model is None:
+      raise ValueError('`model` must be provided if using `MovingAverage`.')
    logging.info('Including moving average decay.')
    optimizer = MovingAverage(
-        optimizer,
+        optimizer=optimizer,
        average_decay=moving_average_decay)
+    optimizer.shadow_copy(model)
  return optimizer



--- a/official/vision/image_classification/optimizer_factory_test.py
+++ b/official/vision/image_classification/optimizer_factory_test.py
@@ -19,15 +19,21 @@ from __future__ import division
 # from __future__ import google_type_annotations
 from __future__ import print_function

-import tensorflow as tf
-
 from absl.testing import parameterized
+
+import tensorflow as tf
 from official.vision.image_classification import optimizer_factory
 from official.vision.image_classification.configs import base_configs


 class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):

+  def build_toy_model(self) -> tf.keras.Model:
+    """Creates a toy `tf.Keras.Model`."""
+    model = tf.keras.Sequential()
+    model.add(tf.keras.layers.Dense(1, input_shape=(1,)))
+    return model
+
  @parameterized.named_parameters(
      ('sgd', 'sgd', 0., False),
      ('momentum', 'momentum', 0., False),
@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
      ('rmsprop_ema', 'rmsprop', 0.999, False))
  def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
    """Smoke test to be sure no syntax errors."""
+    model = self.build_toy_model()
    params = {
        'learning_rate': 0.001,
        'rho': 0.09,
@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
    optimizer = optimizer_factory.build_optimizer(
        optimizer_name=optimizer_name,
        base_learning_rate=params['learning_rate'],
-        params=params)
+        params=params,
+        model=model)
    self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer))

  def test_unknown_optimizer(self):

--- a/official/vision/image_classification/resnet/common.py
+++ b/official/vision/image_classification/resnet/common.py
@@ -255,7 +255,7 @@ def define_keras_flags(
      name='tpu', default='', help='TPU address to connect to.')
  flags.DEFINE_integer(
      name='steps_per_loop',
-      default=500,
+      default=None,
      help='Number of steps per training loop. Only training step happens '
      'inside the loop. Callbacks will not be called inside. Will be capped at '
      'steps per epoch.')

--- a/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py
+++ b/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py
@@ -14,18 +14,16 @@
 # ==============================================================================
 """Runs a ResNet model on the ImageNet dataset using custom training loops."""

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import math
+import os
+
 from absl import app
 from absl import flags
 from absl import logging
+import orbit
 import tensorflow as tf

 from official.modeling import performance
-from official.staging.training import controller
 from official.utils.flags import core as flags_core
 from official.utils.misc import distribution_utils
 from official.utils.misc import keras_utils
@@ -87,15 +85,6 @@ def get_num_train_iterations(flags_obj):
  return train_steps, train_epochs, eval_steps


-def _steps_to_run(steps_in_current_epoch, steps_per_epoch, steps_per_loop):
-  """Calculates steps to run on device."""
-  if steps_per_loop <= 0:
-    raise ValueError('steps_per_loop should be positive integer.')
-  if steps_per_loop == 1:
-    return steps_per_loop
-  return min(steps_per_loop, steps_per_epoch - steps_in_current_epoch)
-
-
 def run(flags_obj):
  """Run ResNet ImageNet training and eval loop using custom training loops.

@@ -121,7 +110,6 @@ def run(flags_obj):
          datasets_num_private_threads=flags_obj.datasets_num_private_threads)
    common.set_cudnn_batchnorm_mode()

-  # TODO(anj-s): Set data_format without using Keras.
  data_format = flags_obj.data_format
  if data_format is None:
    data_format = ('channels_first' if tf.config.list_physical_devices('GPU')
@@ -137,7 +125,14 @@ def run(flags_obj):

  per_epoch_steps, train_epochs, eval_steps = get_num_train_iterations(
      flags_obj)
-  steps_per_loop = min(flags_obj.steps_per_loop, per_epoch_steps)
+  if flags_obj.steps_per_loop is None:
+    steps_per_loop = per_epoch_steps
+  elif flags_obj.steps_per_loop > per_epoch_steps:
+    steps_per_loop = per_epoch_steps
+    logging.warn('Setting steps_per_loop to %d to respect epoch boundary.',
+                 steps_per_loop)
+  else:
+    steps_per_loop = flags_obj.steps_per_loop

  logging.info(
      'Training %d epochs, each epoch has %d steps, '
@@ -154,8 +149,8 @@ def run(flags_obj):

  eval_interval = flags_obj.epochs_between_evals * per_epoch_steps
  checkpoint_interval = (
-      per_epoch_steps if flags_obj.enable_checkpoint_and_export else None)
-  summary_interval = per_epoch_steps if flags_obj.enable_tensorboard else None
+      steps_per_loop * 5 if flags_obj.enable_checkpoint_and_export else None)
+  summary_interval = steps_per_loop if flags_obj.enable_tensorboard else None

  checkpoint_manager = tf.train.CheckpointManager(
      runnable.checkpoint,
@@ -164,20 +159,24 @@ def run(flags_obj):
      step_counter=runnable.global_step,
      checkpoint_interval=checkpoint_interval)

-  resnet_controller = controller.Controller(
+  resnet_controller = orbit.Controller(
      strategy,
-      runnable.train,
-      runnable.evaluate if not flags_obj.skip_eval else None,
+      runnable,
+      runnable if not flags_obj.skip_eval else None,
      global_step=runnable.global_step,
      steps_per_loop=steps_per_loop,
-      train_steps=per_epoch_steps * train_epochs,
      checkpoint_manager=checkpoint_manager,
      summary_interval=summary_interval,
-      eval_steps=eval_steps,
-      eval_interval=eval_interval)
+      eval_summary_dir=os.path.join(flags_obj.model_dir, 'eval'))

  time_callback.on_train_begin()
-  resnet_controller.train(evaluate=not flags_obj.skip_eval)
+  if not flags_obj.skip_eval:
+    resnet_controller.train_and_evaluate(
+        train_steps=per_epoch_steps * train_epochs,
+        eval_steps=eval_steps,
+        eval_interval=eval_interval)
+  else:
+    resnet_controller.train(steps=per_epoch_steps * train_epochs)
  time_callback.on_train_end()

  stats = build_stats(runnable, time_callback)

--- a/official/vision/image_classification/resnet/resnet_runnable.py
+++ b/official/vision/image_classification/resnet/resnet_runnable.py
@@ -14,33 +14,21 @@
 # ==============================================================================
 """Runs a ResNet model on the ImageNet dataset using custom training loops."""

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
+import orbit
 import tensorflow as tf

 from official.modeling import performance
 from official.staging.training import grad_utils
-from official.staging.training import standard_runnable
-from official.staging.training import utils
 from official.utils.flags import core as flags_core
 from official.vision.image_classification.resnet import common
 from official.vision.image_classification.resnet import imagenet_preprocessing
 from official.vision.image_classification.resnet import resnet_model


-class ResnetRunnable(standard_runnable.StandardTrainable,
-                     standard_runnable.StandardEvaluable):
+class ResnetRunnable(orbit.StandardTrainer, orbit.StandardEvaluator):
  """Implements the training and evaluation APIs for Resnet model."""

  def __init__(self, flags_obj, time_callback, epoch_steps):
-    standard_runnable.StandardTrainable.__init__(self,
-                                                 flags_obj.use_tf_while_loop,
-                                                 flags_obj.use_tf_function)
-    standard_runnable.StandardEvaluable.__init__(self,
-                                                 flags_obj.use_tf_function)
-
    self.strategy = tf.distribute.get_strategy()
    self.flags_obj = flags_obj
    self.dtype = flags_core.get_tf_dtype(flags_obj)
@@ -107,11 +95,8 @@ class ResnetRunnable(standard_runnable.StandardTrainable,

    # Handling epochs.
    self.epoch_steps = epoch_steps
-    self.epoch_helper = utils.EpochHelper(epoch_steps, self.global_step)
-
-  def build_train_dataset(self):
-    """See base class."""
-    return utils.make_distributed_dataset(
+    self.epoch_helper = orbit.utils.EpochHelper(epoch_steps, self.global_step)
+    train_dataset = orbit.utils.make_distributed_dataset(
        self.strategy,
        self.input_fn,
        is_training=True,
@@ -122,10 +107,11 @@ class ResnetRunnable(standard_runnable.StandardTrainable,
        .datasets_num_private_threads,
        dtype=self.dtype,
        drop_remainder=True)
-
-  def build_eval_dataset(self):
-    """See base class."""
-    return utils.make_distributed_dataset(
+    orbit.StandardTrainer.__init__(self, train_dataset,
+                                   flags_obj.use_tf_while_loop,
+                                   flags_obj.use_tf_function)
+    if not flags_obj.skip_eval:
+      eval_dataset = orbit.utils.make_distributed_dataset(
          self.strategy,
          self.input_fn,
          is_training=False,
@@ -133,6 +119,8 @@ class ResnetRunnable(standard_runnable.StandardTrainable,
          batch_size=self.batch_size,
          parse_record_fn=imagenet_preprocessing.parse_record,
          dtype=self.dtype)
+      orbit.StandardEvaluator.__init__(self, eval_dataset,
+                                       flags_obj.use_tf_function)

  def train_loop_begin(self):
    """See base class."""

--- a/research/keypointnet/LICENSE
+++ b/research/keypointnet/LICENSE
--- a/orbit/README.md
+++ b/orbit/README.md
+![TensorFlow Requirement: 2.x](https://img.shields.io/badge/TensorFlow%20Requirement-2.x-brightgreen)
+
+# Orbit
+
+Orbit is a customized training loop library built on top of Tensorflow 2. It
+provides a flexible lightweight library that users can easily use or fork when
+writing [customized training loop code](https://www.tensorflow.org/tutorials/distribute/custom_training)
+in TF2. It intergates with `tf.distribute` seamlessly and supports running on
+different device types (CPU, GPU, and TPU).
--- a/research/learning_unsupervised_learning/meta_objective/__init__.py
+++ b/research/learning_unsupervised_learning/meta_objective/__init__.py
-# Copyright 2018 Google, Inc. All Rights Reserved.
+# Copyright 2020 The Orbit Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Orbit package definition."""

-
-import sklearn
-import linear_regression
+from orbit import utils
+from orbit.controller import Controller
+from orbit.runner import *
+from orbit.standard_runner import *
--- a/official/staging/training/controller.py
+++ b/official/staging/training/controller.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The Orbit Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,51 +14,47 @@
 # ==============================================================================
 """A light weight utilities to train TF2 models."""

-from __future__ import absolute_import
-from __future__ import division
-# from __future__ import google_type_annotations
-from __future__ import print_function
-
 import time
-
+from typing import Callable, Dict, Optional, Text, Union
 from absl import logging
+import numpy as np
+from orbit import runner
+from orbit import utils

-import tensorflow.compat.v2 as tf
-from typing import Callable, Dict, Optional, Text
+import tensorflow as tf

-from official.staging.training import utils
+
+def _log_info(message: Text):
+  """Logs `message` to the `info` log, and also prints to stdout."""
+  logging.info(message)
+  print(message)


-class Controller(object):
+class Controller:
  """Class that facilitates training and evaluation of models."""

  def __init__(
      self,
      strategy: Optional[tf.distribute.Strategy] = None,
-      train_fn: Optional[Callable[[tf.Tensor],
-                                  Optional[Dict[Text, tf.Tensor]]]] = None,
-      eval_fn: Optional[Callable[[tf.Tensor],
-                                 Optional[Dict[Text, tf.Tensor]]]] = None,
+      trainer: Optional[runner.AbstractTrainer] = None,
+      evaluator: Optional[runner.AbstractEvaluator] = None,
      global_step: Optional[tf.Variable] = None,
      # Train related
-      train_steps: Optional[int] = None,
      steps_per_loop: Optional[int] = None,
-      summary_dir: Optional[Text] = None,
      checkpoint_manager: Optional[tf.train.CheckpointManager] = None,
-      # summary related
+      # Summary related
      summary_interval: Optional[int] = None,
+      summary_dir: Optional[Text] = None,
      # Evaluation related
-      eval_summary_dir: Optional[Text] = None,
-      eval_steps: Optional[int] = None,
-      eval_interval: Optional[int] = None):
+      eval_summary_dir: Optional[Text] = None):
    """Constructs a `Controller` instance.

    Args:
      strategy: An instance of `tf.distribute.Strategy`.
-      train_fn: A callable defined as `def train_fn(num_steps)`, which
-        `num_steps` indicates the number of steps to run for each loop.
-      eval_fn: A callable defined as `def eval_fn(num_steps)`, which `num_steps`
-        indicates the number of steps for one evaluation.
+      trainer: An instance of `orbit.AbstractTrainer`, which represents model
+        training details.
+      evaluator: An instance of `orbit.AbstractEvaluator`, which represents
+        model evaluation details.
      global_step: An integer `tf.Variable` indicating the global training step
        number. Usually this can be obtained from `iterations` property of the
        model's optimizer (e.g. `self.optimizer.iterations`), or users can
@@ -66,105 +62,166 @@ class Controller(object):
        own global step variable, it is recommended to create the `tf.Variable`
        inside strategy scope, and with
        `aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA`.
-      train_steps: The total (maximum) number of training steps to perform.
      steps_per_loop: The number of steps to run in each "inner loop" of
-        training (passed to the `num_steps` parameter of `train_fn`).
-      summary_dir: The directory to restore and write checkpoints and summaries.
-        If None, it will be set to `checkpoint_manager.directory`.
+        training (passed to the `num_steps` parameter of `trainer.train`).
      checkpoint_manager: An instance of `tf.train.CheckpointManager`.
      summary_interval: Step interval for training summaries. Note that this
-        argument only applies to the summaries outside the training loop. If the
-        value is None, then training summaries are not enabled.
+        argument only applies to the summaries inside `trainer.train` function.
+        Summaries outside like "steps_per_second" and outputs from
+        `trainer.train` function will always be enabled. If set, the value
+        should be divisible by steps_per_loop.
+      summary_dir: The directory to restore and write checkpoints and summaries.
+        If None, it will be set to `checkpoint_manager.directory`.
      eval_summary_dir: The directory to write eval summaries. If None, it will
        be set to `summary_dir`.
-      eval_steps: Number of steps to run evaluation.
-      eval_interval: Step interval for evaluation. If None, will skip evaluation
-        in the middle of training. Note that evaluation only happens outside the
-        training loop, which the loop iteration is specify by `steps_per_loop`
-        parameter.

    Raises:
-      ValueError: If both `train_fn` and `eval_fn` are None.
-      ValueError: If `train_fn` is not None and `train_steps` is None.
-      ValueError: If `steps_per_loop` is None when `train_fn` is provided.
+      ValueError: If both `trainer` and `evaluator` are None.
      ValueError: If `steps_per_loop` is not a positive integer.
+      ValueError: If `summary_interval` is not a positive integer or it cannot
+        be divisible by `steps_per_loop`.
    """
-    if train_fn is None and eval_fn is None:
-      raise ValueError("`train_fn` and `eval_fn` should not both be None")
-
-    # TODO(rxsang): Support training until exhaustion by passing
-    # `train_steps=-1`. Currently it cannot be supported with a host training
-    # loop because break statements are not supported with distributed dataset.
-    if train_fn is not None:
-      if train_steps is None:
-        raise ValueError("`train_steps` is required when `train_fn` is "
-                         "provided.")
+    if trainer is None and evaluator is None:
+      raise ValueError("`trainer` and `evaluator` should not both be None")
+
+    if trainer is not None:
      if steps_per_loop is None:
-        raise ValueError("`steps_per_loop` is required when `train_fn is "
+        raise ValueError("`steps_per_loop` is required when `trainer` is "
                         "provided.")
+
      if not isinstance(steps_per_loop, int) or steps_per_loop < 1:
        raise ValueError("`steps_per_loop` should be a positive integer")
-    if summary_interval is not None and summary_interval <= 0:
+
+      if summary_interval is not None:
+        if summary_interval <= 0:
          raise ValueError("`summary_interval` should be larger than 0")
+        if summary_interval % steps_per_loop != 0:
+          raise ValueError("The summary interval ({}) must be a multiple "
+                           "of the steps_per_loop ({})".format(
+                               summary_interval, steps_per_loop))
+
+    self.trainer = trainer
+    self.evaluator = evaluator

    self.strategy = strategy or tf.distribute.get_strategy()

-    self.train_fn = train_fn
-    self.eval_fn = eval_fn
    self.global_step = global_step
    self.checkpoint_manager = checkpoint_manager

-    if self.train_fn is not None:
-      self.train_steps = train_steps
-      self.steps_per_loop = steps_per_loop
-      if summary_dir:
-        self.summary_dir = summary_dir
-      elif checkpoint_manager:
-        self.summary_dir = checkpoint_manager.directory
-      else:
-        self.summary_dir = None
+    if summary_dir is None and checkpoint_manager:
+      summary_dir = checkpoint_manager.directory

+    if self.trainer is not None:
+      self.step_timer = None
+      self.steps_per_loop = steps_per_loop
      self.summary_interval = summary_interval
-      if self.summary_dir and self.summary_interval:
-        summary_writer = tf.summary.create_file_writer(self.summary_dir)
-      else:
-        summary_writer = None
-      # TODO(rxsang): Consider pass SummaryManager directly into Controller for
-      # maximum customizability.
      self.summary_manager = utils.SummaryManager(
-          summary_writer,
-          tf.summary.scalar,
-          global_step=self.global_step,
-          summary_interval=self.summary_interval)
-
-    if self.eval_fn is not None:
-      eval_summary_dir = eval_summary_dir or self.summary_dir
-      eval_summary_writer = tf.summary.create_file_writer(
-          eval_summary_dir) if eval_summary_dir else None
+          summary_dir, tf.summary.scalar, global_step=self.global_step)
+
+    eval_summary_writer = None
+    if self.evaluator is not None:
+      eval_summary_dir = eval_summary_dir or summary_dir
+      if eval_summary_dir == summary_dir and self.trainer is not None:
+        # Reuse the summary writer if train and evaluation summary directory
+        # are the same.
+        self.eval_summary_manager = self.summary_manager
+      else:
        self.eval_summary_manager = utils.SummaryManager(
-          eval_summary_writer, tf.summary.scalar, global_step=self.global_step)
+            eval_summary_dir, tf.summary.scalar, global_step=self.global_step)

-      self.eval_steps = eval_steps
-      self.eval_interval = eval_interval
-
-      # Creates and initializes the interval triggers.
-      self.eval_trigger = utils.IntervalTrigger(self.eval_interval,
-                                                self.global_step.numpy())  # pytype: disable=attribute-error
-
-    if self.global_step:
+    if self.global_step is not None:
      tf.summary.experimental.set_step(self.global_step)

    # Restores the model if needed.
+    # TODO(momernick): We probably only want to do this on certain occasions?
    if self.checkpoint_manager is not None:
-      model_restored = self._restore_model()
-      if not model_restored and self.checkpoint_manager.checkpoint_interval:
-        # If the model is not restored from a checkpoint, save an initial
+      checkpoint_interval = self.checkpoint_manager.checkpoint_interval
+      model_restored = self.restore_checkpoint()
+      if not model_restored and (checkpoint_interval and
+                                 self.trainer is not None):
+        # If the model is not restored from a checkpoint, and
+        # `checkpoint_interval` is enabled for training, save an initial
        # checkpoint.
-        ckpt_path = self.checkpoint_manager.save(
-            checkpoint_number=self.global_step)
-        logging.info("Saved checkpoins in %s", ckpt_path)
+        self.save_checkpoint()
+
+  def train(self, steps: int, checkpoint_at_completion: bool = True):
+    """Runs training.
+
+    This method calls the `train` method on the Trainable object until the
+    global step count is equal to `steps`. It will optionally save checkpoints,
+    if a CheckpointManager was passed to the Controller instance's `__init__`.
+
+    Args:
+      steps: The global step count to train up to.
+      checkpoint_at_completion: Whether to save a checkpoint when this method
+        returns. Defaults to True (write the checkpoint). This is always
+        triggered, regardless of the checkpointing interval.
+    """
+    if self.trainer is None:
+      raise ValueError("`self.trainer` is required when calling `train` "
+                       "method.")
+    if self.global_step is None:
+      raise ValueError("`self.global_step` is required when calling `train` "
+                       "method.")
+
+    # TODO(momernick): Support steps=None or -1 (training to exhaustion).
+    current_step = self.global_step.numpy()  # This is an expensive access.
+    while current_step < steps:
+      logging.info("Train at step %s of %s", current_step, steps)
+      # Calculates steps to run for the next train loop.
+      num_steps = min(steps - current_step, self.steps_per_loop)
+      self._train_n_steps(num_steps)
+      self._maybe_save_checkpoint()
+      current_step = self.global_step.numpy()  # This is an expensive access.
+
+    if checkpoint_at_completion:
+      self.save_checkpoint()

-  def _restore_model(self, checkpoint_path=None):
+  def evaluate(self, steps: int = None) -> Optional[Dict[Text, np.number]]:
+    """Runs evaluation.
+
+    This method calls the `evaluate` method on the Evaluator object for `steps`
+    steps, then writes the returned summaries (if any).
+
+    Args:
+      steps: The number of steps to evaluate for.
+
+    Returns:
+      The evaluation results as a dictionary of numpy values.
+
+    Raises:
+      ValueError: If no checkpoint found in `self.checkpoint_manager.directory`.
+      ValueError: If `evaluator` is not provided.
+    """
+    if self.evaluator is None:
+      raise ValueError("`evaluator` must be provided to call `evaluate()` "
+                       "method.")
+
+    steps = steps or -1
+    current_step = self.global_step.numpy()
+    if steps > 0:
+      logging.info("Running %s steps of evaluation at train step: %s", steps,
+                   current_step)
+      steps = tf.convert_to_tensor(steps, dtype=tf.int32)
+    else:
+      logging.info("Evaluating at train step: %s", current_step)
+
+    with self.eval_summary_manager.summary_writer().as_default():
+      eval_outputs = self.evaluator.evaluate(steps)
+
+    if eval_outputs:
+      eval_outputs = tf.nest.map_structure(utils.get_value, eval_outputs)
+
+    info = "step: {}        evaluation metric: {}".format(
+        current_step, eval_outputs)
+    _log_info(info)
+
+    self.eval_summary_manager.write_summaries(eval_outputs)
+    self.eval_summary_manager.flush()
+
+    return eval_outputs
+
+  def restore_checkpoint(self, checkpoint_path: Text = None):
    """Restore or initialize the model.

    Args:
@@ -172,153 +229,164 @@ class Controller(object):
        restore. If None, will restore from `self.checkpoint_manager`.

    Returns:
-      True if the latest checkpoint is found or restored. Otherwise False.
+      The path to the restored checkpoint if a restore happened, or None
+        if no restore occurred.
    """
    with self.strategy.scope():
      # Checkpoint restoring should be inside scope. b/139450638
      if checkpoint_path is not None:
        self.checkpoint_manager.checkpoint.restore(checkpoint_path)
-        return True
+        return checkpoint_path
      return self.checkpoint_manager.restore_or_initialize()

-  def _evaluate_once(self, current_step):
-    """Runs the evaluation once."""
-    logging.info("Start evaluation at step: %s", current_step)
+  def save_checkpoint(self):
+    """Checkpoint the model.

-    with self.eval_summary_manager.summary_writer.as_default():
-      eval_outputs = self.eval_fn(self.eval_steps)
+    This method will write a checkpoint containing the current state of the
+    model.

-    if eval_outputs:
-      eval_outputs = tf.nest.map_structure(lambda x: x.numpy(), eval_outputs)
+    Raises:
+      ValueError: if no CheckpointManager was provided to this Controller's
+        init args.
+    """
+    self._maybe_save_checkpoint(force_trigger=True)

-    info = "step: {}        evaluation metric: {}".format(
-        current_step, eval_outputs)
-    self._log_info(info)
+  def train_and_evaluate(self,
+                         train_steps: int = None,
+                         eval_steps: int = None,
+                         eval_interval: int = None):
+    """Train and evaluate in an interleaved manner.

-    self.eval_summary_manager.write_summaries(eval_outputs)
-    self.eval_summary_manager.flush()
+    This method will train the model until the global step count equals
+    `train_steps`, running an evaluation for `eval_steps` every `eval_interval`
+    training steps. In addition, this method will run a final evaluation at the
+    end of the training sequence.

-  def _maybe_save_checkpoints(self, current_step, force_trigger=False):
-    if self.checkpoint_manager and self.checkpoint_manager.checkpoint_interval:
-      ckpt_path = self.checkpoint_manager.save(
-          checkpoint_number=current_step, check_interval=not force_trigger)
-      if ckpt_path is not None:
-        logging.info("Saved checkpoins in %s", ckpt_path)
+    Args:
+      train_steps: The global step count to train up to.
+      eval_steps: The number of steps to run during an evaluation. If None,
+        this method will evaluate over the entire evaluation dataset.
+      eval_interval: The number of training steps to run between evaluations.
+        If set, training will always stop every `eval_interval` steps, even if
+        this results in a shorter inner loop than specified by `steps_per_loop`
+        setting. If None, evaluation will only be performed after training is
+        complete.

-  def _maybe_evaluate(self, current_step, force_trigger=False):
-    if self.eval_trigger(current_step, force_trigger):
-      self._evaluate_once(current_step)
+    Raises:
+      ValueError: If eval_interval is not a multiple of self.steps_per_loop.
+    """
+    current_step = self.global_step.numpy()  # This is an expensive access.
+    eval_interval = eval_interval or (train_steps - current_step)
+    while current_step < train_steps:
+      interval = min(train_steps - current_step, eval_interval)
+      num_steps = current_step + interval
+      self.train(steps=num_steps, checkpoint_at_completion=False)
+      self.evaluate(steps=eval_steps)
+      current_step = self.global_step.numpy()  # This is an expensive access.
+    self.save_checkpoint()
+
+  def evaluate_continuously(self,
+                            steps: int = None,
+                            timeout: Optional[Union[int, float]] = None,
+                            timeout_fn: Optional[Callable[[], bool]] = None):
+    """Monitor a directory and evaluate on checkpoints in it.
+
+    This method continuously monitors a directory as specified by this
+    Controller's CheckpointManager init arg and runs evaluation on the
+    checkpoints found there.

-  def _log_info(self, message):
-    """Logs `message` to the `info` log, and also prints to stdout."""
-    logging.info(message)
-    print(message)
+    Args:
+      steps: The number of steps to run when evaluating.
+      timeout: The maximum number of seconds to wait between checkpoints. See
+        tf.train.checkpoints_iterator documentation.
+      timeout_fn: Optional callable to call after a timeout. If the function
+        returns True, then it means that no new checkpoints will be generated
+        and the iterator will exit.

-  def train(self, evaluate=True):
-    """Runs the training, with optional evaluation.
+    Raises:
+      ValueError: If no checkpoint found in `self.checkpoint_manager.directory`.
+      ValueError: If `evaluator` was not provided as a controller init arg.
+
+    """
+    for checkpoint_path in tf.train.checkpoints_iterator(
+        self.checkpoint_manager.directory,
+        timeout=timeout,
+        timeout_fn=timeout_fn):
+      self.restore_checkpoint(checkpoint_path)
+      self.evaluate(steps)

-    This handles evaluation, gathering summaries, and saving checkpoints.
+  def _train_n_steps(self, num_steps: int):
+    """Run training for `num_steps`.
+
+    It will also write training outputs to summaries if there is any.

    Args:
-      evaluate: A boolean indicates whether to perform evaluation during
-        training.
+      num_steps: An integer indicates how many steps to run for this training
+        loop.

    Raises:
-      RuntimeError: If `global_step` is not updated correctly in `train_fn`.
+      RuntimeError: If `global_step` is not updated correctly in
+        `trainer.train`.
    """
-    if self.train_fn is None:
-      raise ValueError("`self.train_fn` is required when calling `train` "
-                       "method.")
-    if self.global_step is None:
-      raise ValueError("`self.global_step` is required when calling `train` "
-                       "method.")
-    if evaluate and self.eval_fn is None:
-      raise ValueError("`self.eval_fn` is required when calling `train` method "
-                       "with `evaluate=True`")
+    if not self.step_timer:
+      self.step_timer = StepTimer(self.global_step)

-    step_timer = _StepTimer(self.global_step)
-    current_step = self.global_step.numpy()
-    logging.info("Train at step %s of %s", current_step, self.train_steps)
-    while current_step < self.train_steps:
    # Calculates steps to run for the next train loop.
-      steps_per_loop = min(self.train_steps - current_step, self.steps_per_loop)
-      logging.info("Entering training loop with %s steps, at step %s of %s",
-                   steps_per_loop, current_step, self.train_steps)
-      current_step += steps_per_loop
-      steps_per_loop = tf.convert_to_tensor(steps_per_loop, dtype=tf.int32)
-
-      with self.summary_manager.summary_writer.as_default():
-        train_outputs = self.train_fn(steps_per_loop)
+    current_step = self.global_step.numpy()
+    logging.info("Entering training loop at step %s to run %s steps",
+                 current_step, num_steps)
+    current_step += num_steps
+    num_steps = tf.convert_to_tensor(num_steps, dtype=tf.int32)
+
+    with self.summary_manager.summary_writer().as_default():
+      # Create a lambda that returns true when summaries should be written.
+      should_record = False  # Allows static optimization in no-summary cases.
+      if self.summary_interval:
+        should_record = lambda: (self.global_step % self.summary_interval == 0)
+      with tf.summary.record_if(should_record):
+        train_outputs = self.trainer.train(num_steps)

    # Updates and verifies the current step after a training loop finishes.
    if current_step != self.global_step.numpy():
-        raise RuntimeError("`self.train_fn` is not updating `global_step` "
-                           "correctly, expected: %s, actual: %s" %
+      raise RuntimeError("`trainer.train` function is not updating "
+                         "`global_step` correctly, expected: %s, actual: %s" %
                         (current_step, self.global_step.numpy()))

    # Print information like metrics and steps_per_second after a training
    # loop.
    if train_outputs:
-        train_outputs = tf.nest.map_structure(
-            lambda x: x.numpy(), train_outputs)
-      steps_per_second = step_timer.steps_per_second()
+      train_outputs = tf.nest.map_structure(utils.get_value, train_outputs)
+
+    train_outputs = train_outputs or {}
+    steps_per_second = self.step_timer.steps_per_second()
    info = "step: {}        steps_per_second: {:.2f}        {}".format(
        current_step, steps_per_second, train_outputs)
-      self._log_info(info)
+    _log_info(info)

-      train_outputs = train_outputs or {}
    train_outputs["steps_per_second"] = steps_per_second
    self.summary_manager.write_summaries(train_outputs)

-      self._maybe_save_checkpoints(current_step)
-
-      if evaluate:
-        self._maybe_evaluate(current_step)
-
-    self.summary_manager.write_summaries(train_outputs, always_write=True)
-    self.summary_manager.flush()
-    self._maybe_save_checkpoints(current_step, force_trigger=True)
-    if evaluate:
-      self._maybe_evaluate(current_step, force_trigger=True)
-
-  def evaluate(self, continuous=False, timeout_fn=None):
-    """Runs the evaluation.
+  def _maybe_save_checkpoint(self, force_trigger: bool = False):
+    """Save checkpoints if necessary.

    Args:
-      continuous: If `True`, will continously monitor the checkpoint directory
-        to evaluate on the latest checkpoint. If `False`, will do the evaluation
-        once.
-      timeout_fn: Optional callable to call after a timeout. If the function
-        returns True, then it means that no new checkpoints will be generated
-        and the iterator will exit.
+      force_trigger: A boolean indicates whether to force saving checkpoints
+        regardless of the checkpoint interval.

-    Raises:
-      ValueError: If no checkpoint found in `self.checkpoint_manager.directory`.
+    Returns:
+      A boolean indicating whether a checkpoint was saved.
    """
-    if self.eval_fn is None:
-      raise ValueError("`self.eval_fn` should not be None to call "
-                       "`evaluate()` method.")
-
-    if not continuous and timeout_fn is not None:
-      raise ValueError("`timeout_fn` can be only passed when `continuous` is "
-                       "True")
-
-    if continuous:
-      for checkpoint_path in tf.train.checkpoints_iterator(
-          self.checkpoint_manager.directory, timeout_fn=timeout_fn):
-        self._restore_model(checkpoint_path)
-        self._evaluate_once(self.global_step.numpy())
-      return
-
-    latest_checkpoint = self.checkpoint_manager.latest_checkpoint
-    if not latest_checkpoint:
-      raise ValueError("no checkpoint found in dir %s" %
-                       self.checkpoint_manager.directory)
-    self._restore_model()
-    self._evaluate_once(self.global_step.numpy())
+    if self.checkpoint_manager and self.checkpoint_manager.checkpoint_interval:
+      ckpt_path = self.checkpoint_manager.save(
+          checkpoint_number=self.global_step.numpy(),
+          check_interval=not force_trigger)
+      if ckpt_path is not None:
+        logging.info("Saved checkpoints in %s", ckpt_path)
+        return True
+    return False


-class _StepTimer(object):
+class StepTimer:
  """Utility class for measuring steps/second."""

  def __init__(self, step):

--- a/orbit/controller_test.py
+++ b/orbit/controller_test.py
+# Copyright 2020 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for orbit.controller."""
+
+import os
+from absl import logging
+from absl.testing import parameterized
+import numpy as np
+from orbit import controller
+from orbit import standard_runner
+
+import tensorflow as tf
+
+
+def create_model():
+  x = tf.keras.layers.Input(shape=(3,), name="input")
+  y = tf.keras.layers.Dense(4, name="dense")(x)
+  model = tf.keras.Model(x, y)
+  return model
+
+
+def summaries_with_matching_keyword(keyword, summary_dir):
+  """Returns summary protos matching given keyword from event file."""
+  matches = []
+  event_paths = tf.io.gfile.glob(os.path.join(summary_dir, "events*"))
+  for event in tf.compat.v1.train.summary_iterator(event_paths[-1]):
+    if event.summary is not None:
+      for value in event.summary.value:
+        if keyword in value.tag:
+          matches.append(event.summary)
+  return matches
+
+
+def dataset_fn(ctx):
+  del ctx
+  inputs = np.zeros((10, 3), dtype=np.float32)
+  targets = np.ones((10, 4), dtype=np.float32)
+  dataset = tf.data.Dataset.from_tensor_slices((inputs, targets))
+  dataset = dataset.repeat(100)
+  dataset = dataset.batch(10, drop_remainder=True)
+  return dataset
+
+
+class TestRunner(standard_runner.StandardTrainer,
+                 standard_runner.StandardEvaluator):
+  """Implements the training and evaluation APIs for the test model."""
+
+  def __init__(self, return_numpy=False):
+    self.strategy = tf.distribute.get_strategy()
+    self.model = create_model()
+    self.optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1)
+    self.global_step = self.optimizer.iterations
+    self.train_loss = tf.keras.metrics.Mean("train_loss", dtype=tf.float32)
+    self.eval_loss = tf.keras.metrics.Mean("eval_loss", dtype=tf.float32)
+    self.return_numpy = return_numpy
+    train_dataset = (
+        self.strategy.experimental_distribute_datasets_from_function(dataset_fn)
+    )
+    eval_dataset = (
+        self.strategy.experimental_distribute_datasets_from_function(dataset_fn)
+    )
+    standard_runner.StandardTrainer.__init__(self, train_dataset)
+    standard_runner.StandardEvaluator.__init__(self, eval_dataset)
+
+  def train_step(self, iterator):
+
+    def _replicated_step(inputs):
+      """Replicated training step."""
+      inputs, targets = inputs
+      with tf.GradientTape() as tape:
+        outputs = self.model(inputs)
+        loss = tf.reduce_mean(tf.keras.losses.MSE(targets, outputs))
+      grads = tape.gradient(loss, self.model.variables)
+      self.optimizer.apply_gradients(zip(grads, self.model.variables))
+      self.train_loss.update_state(loss)
+
+    self.strategy.run(_replicated_step, args=(next(iterator),))
+
+  def train_loop_end(self):
+    train_loss = self.train_loss.result()
+    return {
+        "loss": train_loss.numpy() if self.return_numpy else train_loss,
+    }
+
+  def build_eval_dataset(self):
+    return self.strategy.experimental_distribute_datasets_from_function(
+        dataset_fn)
+
+  def eval_begin(self):
+    self.eval_loss.reset_states()
+
+  def eval_step(self, iterator):
+
+    def _replicated_step(inputs):
+      """Replicated evaluation step."""
+      inputs, targets = inputs
+      outputs = self.model(inputs)
+      loss = tf.reduce_mean(tf.keras.losses.MSE(targets, outputs))
+      self.eval_loss.update_state(loss)
+
+    self.strategy.run(_replicated_step, args=(next(iterator),))
+
+  def eval_end(self):
+    eval_loss = self.eval_loss.result()
+    return {
+        "eval_loss": eval_loss.numpy() if self.return_numpy else eval_loss,
+    }
+
+
+class TestEvaluator(standard_runner.StandardEvaluator):
+  """Implements the training and evaluation APIs for the test model."""
+
+  def __init__(self):
+    self.strategy = tf.distribute.get_strategy()
+    self.model = create_model()
+    eval_dataset = self.strategy.experimental_distribute_datasets_from_function(
+        dataset_fn)
+    standard_runner.StandardEvaluator.__init__(self, eval_dataset)
+
+  def eval_reduce(self, state, output):
+    state.append(output)
+    return state
+
+  def eval_begin(self):
+    return []
+
+  def eval_step(self, iterator):
+
+    def _replicated_step(inputs):
+      """Replicated evaluation step."""
+      inputs, targets = inputs
+      outputs = self.model(inputs)
+      loss = tf.reduce_mean(tf.keras.losses.MSE(targets, outputs))
+      return loss
+
+    per_replica_losses = self.strategy.run(
+        _replicated_step, args=(next(iterator),))
+    mean_loss = self.strategy.reduce(
+        tf.distribute.ReduceOp.MEAN, per_replica_losses, axis=None)
+    return mean_loss
+
+  def eval_end(self, outputs):
+    return {
+        "eval_loss": tf.reduce_mean(outputs),
+    }
+
+
+class TestEvaluatorWithNestedSummary(standard_runner.StandardEvaluator):
+  """Implements the training and evaluation APIs for the test model."""
+
+  def __init__(self):
+    self.strategy = tf.distribute.get_strategy()
+    self.model = create_model()
+    dataset = self.strategy.experimental_distribute_datasets_from_function(
+        dataset_fn)
+    dataset2 = self.strategy.experimental_distribute_datasets_from_function(
+        dataset_fn)
+    self.loss = tf.keras.metrics.Mean("loss", dtype=tf.float32)
+    self.accuracy = tf.keras.metrics.CategoricalAccuracy(
+        "accuracy", dtype=tf.float32)
+    self.loss2 = tf.keras.metrics.Mean("loss", dtype=tf.float32)
+    self.accuracy2 = tf.keras.metrics.CategoricalAccuracy(
+        "accuracy", dtype=tf.float32)
+    standard_runner.StandardEvaluator.__init__(
+        self, eval_dataset={
+            "dataset": dataset,
+            "dataset2": dataset2
+        })
+
+  def eval_step(self, iterator):
+
+    def _replicated_step(loss, accuracy, inputs):
+      """Replicated evaluation step."""
+      inputs, targets = inputs
+      outputs = self.model(inputs)
+      loss.update_state(tf.keras.losses.MSE(targets, outputs))
+      accuracy.update_state(targets, outputs)
+
+    self.strategy.run(
+        lambda inputs: _replicated_step(self.loss, self.accuracy, inputs),
+        args=(next(iterator["dataset"]),))
+    self.strategy.run(
+        lambda inputs: _replicated_step(self.loss2, self.accuracy2, inputs),
+        args=(next(iterator["dataset2"]),))
+
+  def eval_end(self):
+    return {
+        "dataset": {
+            "loss": self.loss.result(),
+            "accuracy": self.accuracy.result()
+        },
+        "dataset2": {
+            "loss": self.loss2.result(),
+            "accuracy": self.accuracy2.result()
+        },
+    }
+
+
+class TestTrainerWithSummaries(standard_runner.StandardTrainer):
+  """A Trainer model with summaries for testing purposes."""
+
+  def __init__(self):
+    self.strategy = tf.distribute.get_strategy()
+    self.model = create_model()
+    self.optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1)
+    self.global_step = self.optimizer.iterations
+    self.train_loss = tf.keras.metrics.Mean("train_loss", dtype=tf.float32)
+    train_dataset = (
+        self.strategy.experimental_distribute_datasets_from_function(dataset_fn)
+    )
+    standard_runner.StandardTrainer.__init__(
+        self, train_dataset, use_tpu_summary_optimization=True)
+
+  def build_train_dataset(self):
+    return self.strategy.experimental_distribute_datasets_from_function(
+        dataset_fn)
+
+  def train_step(self, iterator):
+
+    def _replicated_step(inputs):
+      """Replicated training step."""
+      inputs, targets = inputs
+      with tf.GradientTape() as tape:
+        outputs = self.model(inputs)
+        loss = tf.reduce_mean(tf.keras.losses.MSE(targets, outputs))
+      tf.summary.scalar("loss", loss)
+      grads = tape.gradient(loss, self.model.variables)
+      self.optimizer.apply_gradients(zip(grads, self.model.variables))
+      self.train_loss.update_state(loss)
+
+    self.strategy.run(_replicated_step, args=(next(iterator),))
+
+
+class ControllerTest(tf.test.TestCase, parameterized.TestCase):
+
+  def setUp(self):
+    super().setUp()
+    self.model_dir = self.get_temp_dir()
+
+  def test_no_checkpoint(self):
+    test_runner = TestRunner()
+    # No checkpoint manager and no strategy.
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2,
+        summary_dir=os.path.join(self.model_dir, "summaries/train"),
+        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=6)
+    self.assertEqual(test_runner.global_step, 10)
+    # Loss and accuracy values should be written into summaries.
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "loss", os.path.join(self.model_dir, "summaries/train")))
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "eval_loss", os.path.join(self.model_dir, "summaries/eval")))
+    # No checkpoint, so global step starts from 0.
+    test_runner.global_step.assign(0)
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=6)
+    self.assertEqual(test_runner.global_step, 10)
+
+  def test_no_checkpoint_and_summaries(self):
+    test_runner = TestRunner()
+    # No checkpoint + summary directories.
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2)
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=6)
+    self.assertEqual(test_runner.global_step, 10)
+
+  @parameterized.named_parameters(("return_numpy", True),
+                                  ("return_tensor", False))
+  def test_train_and_evaluate(self, return_numpy):
+    test_runner = TestRunner(return_numpy=return_numpy)
+
+    checkpoint = tf.train.Checkpoint(
+        model=test_runner.model, optimizer=test_runner.optimizer)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step,
+        checkpoint_interval=10)
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2,
+        summary_dir=os.path.join(self.model_dir, "summaries/train"),
+        checkpoint_manager=checkpoint_manager,
+        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=6)
+
+    # Checkpoints are saved.
+    self.assertNotEmpty(tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt*")))
+
+    # Loss and accuracy values should be written into summaries.
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "loss", os.path.join(self.model_dir, "summaries/train")))
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "eval_loss", os.path.join(self.model_dir, "summaries/eval")))
+
+  def test_train_only(self):
+    test_runner = TestRunner()
+
+    checkpoint = tf.train.Checkpoint(
+        model=test_runner.model, optimizer=test_runner.optimizer)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step,
+        checkpoint_interval=10)
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2,
+        summary_dir=os.path.join(self.model_dir, "summaries/train"),
+        checkpoint_manager=checkpoint_manager,
+        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"),
+    )
+    test_controller.train(steps=10)
+
+    # Checkpoints are saved.
+    self.assertNotEmpty(tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt*")))
+
+    # Only train summaries are written.
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "loss", os.path.join(self.model_dir, "summaries/train")))
+    self.assertFalse(
+        tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/eval")))
+
+  def test_evaluate_only(self):
+    test_runner = TestRunner()
+
+    checkpoint = tf.train.Checkpoint(model=test_runner.model)
+    checkpoint.save(os.path.join(self.model_dir, "ckpt"))
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step)
+    test_controller = controller.Controller(
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        checkpoint_manager=checkpoint_manager,
+        summary_dir=os.path.join(self.model_dir, "summaries/train"),
+        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
+    eval_results = test_controller.evaluate(steps=2)
+
+    # Only eval summaries are written
+    self.assertFalse(
+        tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/train")))
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "eval_loss", os.path.join(self.model_dir, "summaries/eval")))
+    self.assertIn("eval_loss", eval_results)
+
+    # Tests continuous eval with timeout and timeout_fn.
+    done_file = os.path.join(self.model_dir, "summaries/eval/Done")
+
+    def timeout_fn():
+      with tf.io.gfile.GFile(done_file, "w") as f:
+        f.write("DONE")
+        return True
+
+    test_controller = controller.Controller(
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        checkpoint_manager=checkpoint_manager,
+        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
+    test_controller.evaluate_continuously(
+        timeout=1, timeout_fn=timeout_fn, steps=2)
+    self.assertNotEmpty(tf.io.gfile.glob(done_file))
+
+  def test_no_eval_steps(self):
+    test_runner = TestRunner()
+
+    checkpoint = tf.train.Checkpoint(model=test_runner.model)
+    checkpoint.save(os.path.join(self.model_dir, "ckpt"))
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step)
+    test_controller = controller.Controller(
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        checkpoint_manager=checkpoint_manager)
+    test_controller.evaluate()
+
+  def test_already_trained_model(self):
+    test_runner = TestRunner()
+    test_runner.global_step.assign(10)
+
+    checkpoint = tf.train.Checkpoint(
+        model=test_runner.model, optimizer=test_runner.optimizer)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step,
+        checkpoint_interval=10)
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2,
+        checkpoint_manager=checkpoint_manager)
+    # `global_step` is already `train_steps`.
+    test_controller.train(steps=10)
+
+  def test_summaries_inside_train_fn(self):
+    test_runner = TestTrainerWithSummaries()
+
+    checkpoint = tf.train.Checkpoint(
+        model=test_runner.model, optimizer=test_runner.optimizer)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step)
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2,
+        summary_dir=os.path.join(self.model_dir, "summaries/train"),
+        summary_interval=2,
+        checkpoint_manager=checkpoint_manager,
+    )
+    test_controller.train(steps=10)
+
+    # Checkpoints are saved.
+    self.assertEmpty(tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt*")))
+
+    # Only train summaries are written.
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "loss", os.path.join(self.model_dir, "summaries/train")))
+    self.assertFalse(
+        tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/eval")))
+
+  def test_train_and_evaluate_with_same_summary_dir(self):
+    test_runner = TestRunner()
+
+    checkpoint = tf.train.Checkpoint(
+        model=test_runner.model, optimizer=test_runner.optimizer)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step)
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2,
+        summary_dir=os.path.join(self.model_dir, "summaries"),
+        checkpoint_manager=checkpoint_manager,
+        eval_summary_dir=os.path.join(self.model_dir, "summaries"))
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=6)
+
+    # Loss and accuracy values should be written into summaries.
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "loss", os.path.join(self.model_dir, "summaries")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "eval_loss", os.path.join(self.model_dir, "summaries")))
+
+  def test_early_stop_on_eval_loss(self):
+    test_runner = TestRunner()
+
+    class EarlyStopController(controller.Controller):
+      """A subclass of Controller supports early stopping."""
+
+      def train_and_evaluate(self,
+                             train_steps: int = None,
+                             eval_steps: int = None,
+                             eval_interval: int = None):
+        while self.global_step.numpy() < train_steps:
+          interval = min(train_steps - self.global_step.numpy(), eval_interval)
+          num_steps = self.global_step.numpy() + interval
+          self.train(steps=num_steps, checkpoint_at_completion=False)
+          self.evaluate(steps=eval_steps)
+          # Early stop condition.
+          if test_runner.eval_loss.result() < 0.1:
+            logging.info(
+                "Training early stopped as eval_loss %s is less than 0.1",
+                test_runner.eval_loss.result())
+            return
+
+    checkpoint = tf.train.Checkpoint(
+        model=test_runner.model, optimizer=test_runner.optimizer)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step,
+        checkpoint_interval=10)
+    test_controller = EarlyStopController(
+        trainer=test_runner,
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2,
+        checkpoint_manager=checkpoint_manager)
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=6, eval_interval=2)
+
+    self.assertLess(test_runner.global_step, 10)
+
+  def test_evaluate_with_loss_outputs(self):
+    test_evaluator = TestEvaluator()
+
+    checkpoint = tf.train.Checkpoint(model=test_evaluator.model)
+    checkpoint.save(os.path.join(self.model_dir, "ckpt"))
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint, self.model_dir, max_to_keep=None)
+    test_controller = controller.Controller(
+        evaluator=test_evaluator,
+        global_step=tf.Variable(0, dtype=tf.int64),
+        checkpoint_manager=checkpoint_manager,
+        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
+    test_controller.evaluate(steps=5)
+
+    # Only eval summaries are written
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "eval_loss", os.path.join(self.model_dir, "summaries/eval")))
+
+  def test_train_and_evaluate_reset_datasets(self):
+    test_runner = TestRunner()
+
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=2)
+
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=6)
+
+    train_dataset = (
+        test_runner.strategy.experimental_distribute_datasets_from_function(
+            dataset_fn))
+    eval_dataset = (
+        test_runner.strategy.experimental_distribute_datasets_from_function(
+            dataset_fn))
+    test_runner.train_dataset = train_dataset
+    test_runner.eval_dataset = eval_dataset
+
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=6)
+
+  def test_eval_and_checkpoint_interval(self):
+    test_runner = TestRunner()
+
+    checkpoint = tf.train.Checkpoint(
+        model=test_runner.model, optimizer=test_runner.optimizer)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step,
+        checkpoint_interval=5)
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        evaluator=test_runner,
+        global_step=test_runner.global_step,
+        steps_per_loop=10,
+        checkpoint_manager=checkpoint_manager)
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=5)
+
+    # Expect 3 checkpoints to be saved at step: 0, 5, 10.
+    self.assertLen(
+        tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt-*.data*")), 3)
+    # Expect evaluation is performed 2 times at step: 5, 10.
+    self.assertLen(
+        summaries_with_matching_keyword("eval_loss", self.model_dir), 2)
+
+  def test_evaluate_with_nested_summaries(self):
+    test_evaluator = TestEvaluatorWithNestedSummary()
+    test_controller = controller.Controller(
+        evaluator=test_evaluator,
+        global_step=tf.Variable(0, dtype=tf.int64),
+        eval_summary_dir=self.model_dir)
+    test_controller.evaluate(steps=5)
+
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "dataset")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "loss", os.path.join(self.model_dir, "dataset")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "accuracy", os.path.join(self.model_dir, "dataset")))
+
+    self.assertNotEmpty(
+        tf.io.gfile.listdir(os.path.join(self.model_dir, "dataset2")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "loss", os.path.join(self.model_dir, "dataset2")))
+    self.assertNotEmpty(
+        summaries_with_matching_keyword(
+            "accuracy", os.path.join(self.model_dir, "dataset2")))
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/official/staging/training/runnable.py
+++ b/official/staging/training/runnable.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The Orbit Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,19 +14,12 @@
 # ==============================================================================
 """An abstraction that users can easily handle their custom training loops."""

-from __future__ import absolute_import
-from __future__ import division
-# from __future__ import google_type_annotations
-from __future__ import print_function
-
 import abc
-import six
-import tensorflow.compat.v2 as tf
 from typing import Dict, Optional, Text
+import tensorflow as tf


-@six.add_metaclass(abc.ABCMeta)
-class AbstractTrainable(tf.Module):
+class AbstractTrainer(tf.Module, metaclass=abc.ABCMeta):
  """An abstract class defining the APIs required for training."""

  @abc.abstractmethod
@@ -50,14 +43,14 @@ class AbstractTrainable(tf.Module):
        one update to model parameters, e.g. if training a GAN).

    Returns:
-      The function may return a dictionary of `Tensors`, which will be
-      written to logs and as TensorBoard summaries.
+      The function may return a dictionary of `Tensors` or numpy arrays, which
+      will be written to logs and as TensorBoard summaries. It can also be a
+      nested dictionary, yielding a hierarchy of summary directories.
    """
    pass


-@six.add_metaclass(abc.ABCMeta)
-class AbstractEvaluable(tf.Module):
+class AbstractEvaluator(tf.Module, metaclass=abc.ABCMeta):
  """An abstract class defining the APIs required for evaluation."""

  @abc.abstractmethod
@@ -73,7 +66,8 @@ class AbstractEvaluable(tf.Module):
        is `None`.

    Returns:
-      The function may return a dictionary of `Tensors`, which will be
-      written to logs and as TensorBoard summaries.
+      The function may return a dictionary of `Tensors` or numpy arrays, which
+      will be written to logs and as TensorBoard summaries. It can also be a
+      nested dictionary, yielding a hierarchy of summary directories.
    """
    pass
--- a/official/staging/training/standard_runnable.py
+++ b/official/staging/training/standard_runnable.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The Orbit Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,67 +14,101 @@
 # ==============================================================================
 """An abstraction that users can easily handle their custom training loops."""

-from __future__ import absolute_import
-from __future__ import division
-# from __future__ import google_type_annotations
-from __future__ import print_function
-
 import abc
-import six
-import tensorflow.compat.v2 as tf
-from typing import Dict, Optional, Text
+from typing import Any, Dict, Optional, Text
+import dataclasses
+from orbit import runner
+from orbit import utils
+import tensorflow as tf
+
+
+@dataclasses.dataclass(frozen=True)
+class TrainerOverrides:
+  """Advanced overrides for Orbit trainers.
+
+  Attributes:
+    use_tf_while_loop: A boolean indicates whether to wrap the train step with
+      a `tf.while_loop`.
+    use_tf_function: A boolean indicates whether a `tf.function` will be used.
+      If False, training will run on pure eager mode.
+    use_tpu_summary_optimization: A boolean indicates whether to enable the
+      performance optimization for summaries in TPUs. In TPUs, writing
+      summaries with outside compilation inside train step is slow. If True,
+      it creates two `tf.function` with two XLA programs: one with summaries
+      and one without, and run the program with summaries (slow one) only if
+      necessary.
+  """
+  use_tf_while_loop: bool = True
+  use_tf_function: bool = True
+  use_tpu_summary_optimization: bool = False

-from official.staging.training import runnable
-from official.staging.training import utils

+class StandardTrainer(runner.AbstractTrainer, metaclass=abc.ABCMeta):
+  """Implements the standard functionality of AbstractTrainer APIs."""

-@six.add_metaclass(abc.ABCMeta)
-class StandardTrainable(runnable.AbstractTrainable):
-  """Implements the standard functionality of AbstractTrainable APIs."""
+  def __init__(self,
+               train_dataset,
+               use_tf_while_loop=True,
+               use_tf_function=True,
+               use_tpu_summary_optimization=False):
+    """Construct a `StandardTrainer` object.

-  def __init__(self, use_tf_while_loop=True, use_tf_function=True):
+    Args:
+      train_dataset: A tf.nest-compatible structure of tf.data.Dataset or
+        DistributedDataset.
+      use_tf_while_loop: A boolean indicates whether to wrap the train step with
+        a `tf.while_loop`.
+      use_tf_function: A boolean indicates whether a `tf.function` will be used.
+        If False, training will run on pure eager mode.
+      use_tpu_summary_optimization: A boolean indicates whether to enable the
+        performance optimization for summaries in TPUs. In TPUs, writing
+        summaries with outside compilation inside train step is slow. If True,
+        it creates two `tf.function` with two XLA programs: one with summaries
+          and one without, and run the program with summaries (slow one) only if
+          necessary.
+    """
    if use_tf_while_loop and not use_tf_function:
      raise ValueError("`use_tf_while_loop=True` and `use_tf_function=False` "
                       "is not supported")
-    self.use_tf_while_loop = use_tf_while_loop
-    self.use_tf_function = use_tf_function
-    self.train_dataset = None
-    self.train_iter = None
-    self.train_loop_fn = None
-
-  @abc.abstractmethod
-  def build_train_dataset(self):
-    """Builds the training datasets.
-
-    Returns:
-      A tf.nest-compatible structure of tf.data.Dataset or DistributedDataset.
-    """
-    pass
+    if use_tpu_summary_optimization and not use_tf_while_loop:
+      raise ValueError("`use_tpu_summary_optimization=True` and "
+                       "`use_tf_while_loop=False` is not supported")
+    self._use_tf_while_loop = use_tf_while_loop
+    self._use_tf_function = use_tf_function
+    self._train_dataset = train_dataset
+    self._train_iter = None
+    self._train_loop_fn = None
+    self._use_tpu_summary_optimization = use_tpu_summary_optimization

  def train(self,
            num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]:
    """See base class."""
-    if self.train_dataset is None:
-      # Build train input dataset
-      self.train_dataset = self.build_train_dataset()
-      self.train_iter = tf.nest.map_structure(iter, self.train_dataset)
+    self.train_loop_begin()
+
+    if self._train_iter is None:
+      self._train_iter = tf.nest.map_structure(iter, self.train_dataset)

-    if self.train_loop_fn is None:
+    if self._train_loop_fn is None:
      train_fn = self.train_step
-      if self.use_tf_while_loop:
-        self.train_loop_fn = utils.create_tf_while_loop_fn(train_fn)
+      if self._use_tf_while_loop:
+        self._train_loop_fn = utils.create_tf_while_loop_fn(train_fn)
+        if self._use_tpu_summary_optimization:
+          self._train_loop_fn = utils.train_function_with_summaries(
+              self._train_loop_fn)
+        else:
+          self._train_loop_fn = tf.function(self._train_loop_fn)
      else:
-        if self.use_tf_function:
+        if self._use_tf_function:
          train_fn = tf.function(train_fn)
-        self.train_loop_fn = utils.create_loop_fn(train_fn)
+        self._train_loop_fn = utils.create_loop_fn(train_fn)

-    self.train_loop_begin()
-    self.train_loop_fn(self.train_iter, num_steps)
+    self._train_loop_fn(self._train_iter, num_steps)
    return self.train_loop_end()

  def train_loop_begin(self):
    """Called once at the beginning of the training loop.

+    This method is called before dataset iterators creation.
    This is a good place to reset metrics that accumulate values over multiple
    steps of training.
    """
@@ -89,6 +123,12 @@ class StandardTrainable(runnable.AbstractTrainable):
    context" for generality, to allow e.g. multiple iterator dequeues and calls
    to `strategy.run`.

+    Note that if `use_tf_function=True`, all the code inside `train_step` should
+    be tf.function compatible, as they will be traced with tf.function. This
+    means you cannot put arbitrary python code in this function. If users have
+    any numpy operations, they should be put in `train_loop_begin` or
+    `train_loop_end` functions.
+
    Args:
      iterator: A tf.nest-compatible structure of tf.data Iterator or
        DistributedIterator.
@@ -103,58 +143,90 @@ class StandardTrainable(runnable.AbstractTrainable):

    Returns:
      The function may return a dictionary of `Tensors`, which will be
-      written to logs and as TensorBoard summaries.
+      written to logs and as TensorBoard summaries. It can also be a
+      nested dictionary, yielding a hierarchy of summary directories.
    """
    pass

+  @property
+  def train_dataset(self):
+    """Returns the train_dataset instance."""
+    return self._train_dataset

-@six.add_metaclass(abc.ABCMeta)
-class StandardEvaluable(runnable.AbstractEvaluable):
-  """Implements the standard functionality of AbstractEvaluable APIs."""
+  @train_dataset.setter
+  def train_dataset(self, train_dataset):
+    """Set a new train dataset and replace with the existing one.

-  def __init__(self, use_tf_function=True):
-    self.eval_use_tf_function = use_tf_function
-    self.eval_dataset = None
-    self.eval_loop_fn = None
+    Any unfinished work in the previous dataset will be discarded.

-  @abc.abstractmethod
-  def build_eval_dataset(self):
-    """Builds the evaluation datasets.
+    Args:
+      train_dataset: A tf.nest-compatible structure of tf.data.Dataset or
+        DistributedDataset.
+    """
+    self._train_dataset = train_dataset
+    self._train_iter = None

-    Returns:
-      A tf.nest-compatible structure of tf.data.Dataset or DistributedDataset.
+
+@dataclasses.dataclass(frozen=True)
+class EvaluatorOverrides:
+  """Advanced overrides for Orbit evaluators.
+
+  Attributes:
+    use_tf_function: A boolean indicates whether a `tf.function` will be used.
+      If False, training will run on pure eager mode.
  """
-    pass
+  use_tf_function: bool = True
+
+
+class StandardEvaluator(runner.AbstractEvaluator, metaclass=abc.ABCMeta):
+  """Implements the standard functionality of AbstractEvaluator APIs."""
+
+  def __init__(self, eval_dataset, use_tf_function=True):
+    """Construct a `StandardEvaluator` object.
+
+    Args:
+      eval_dataset: A tf.nest-compatible structure of tf.data.Dataset or
+        DistributedDataset.
+      use_tf_function: A boolean indicates whether a `tf.function` will be used.
+        If False, evaluation will run on pure eager mode.
+    """
+    self._eval_use_tf_function = use_tf_function
+    self._eval_dataset = eval_dataset
+    self._eval_loop_fn = None

  def evaluate(
      self, num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]:
    """See base class."""
-    if self.eval_dataset is None:
-      # Build train input dataset
-      self.eval_dataset = self.build_eval_dataset()
+    outputs = self.eval_begin()  # pylint: disable=assignment-from-no-return

-    if self.eval_loop_fn is None:
+    eval_iter = tf.nest.map_structure(iter, self._eval_dataset)
+    if self._eval_loop_fn is None:
      eval_fn = self.eval_step
-      if self.eval_use_tf_function:
+      if self._eval_use_tf_function:
        eval_fn = tf.function(eval_fn)
-      self.eval_loop_fn = utils.create_loop_fn(eval_fn)
-
-    eval_iter = tf.nest.map_structure(iter, self.eval_dataset)
+      self._eval_loop_fn = utils.create_loop_fn(eval_fn)

-    self.eval_begin()
-    self.eval_loop_fn(eval_iter, num_steps)
+    outputs = self._eval_loop_fn(
+        eval_iter, num_steps, state=outputs, reduce_fn=self.eval_reduce)
+    if outputs is None:
      return self.eval_end()
+    else:
+      return self.eval_end(outputs)

-  def eval_begin(self):
+  def eval_begin(self) -> Any:
    """Called once at the beginning of the evaluation.

+    This method is called before dataset iterators creation.
    This is a good place to reset metrics that accumulate values over the entire
    evaluation.
+
+    Returns:
+      An output which is passed as `state` argument into `eval_reduce` function.
    """
    pass

  @abc.abstractmethod
-  def eval_step(self, iterator):
+  def eval_step(self, iterator) -> Any:
    """Implements one step of evaluation.

    What a "step" consists of is up to the implementer. If using distribution
@@ -162,20 +234,67 @@ class StandardEvaluable(runnable.AbstractEvaluable):
    context" for generality, to allow e.g. multiple iterator dequeues and calls
    to `strategy.run`.

+    Note that if `use_tf_function=True`, all the code inside `eval_step` should
+    be tf.function compatible, as they will be traced with tf.function. This
+    means you cannot put arbitrary python code in this function. If users have
+    any numpy operations, they should be put in `eval_begin`, `eval_end` or
+    `eval_reduce` functions.
+
    Args:
      iterator: A tf.nest-compatible structure of tf.data Iterator or
        DistributedIterator.
+
+    Returns:
+      An output which is passed as `step_outputs` argument into `eval_reduce`
+      function.
    """
    pass

-  def eval_end(self) -> Optional[Dict[Text, tf.Tensor]]:
+  def eval_end(self, *args) -> Optional[Dict[Text, tf.Tensor]]:
    """Called at the end of the evaluation.

    This is a good place to get metric results. The value returned from this
    function will be returned as-is from the evaluate() method.

+    Args:
+      *args: the outputs from `eval_reduce` for the last eval step.
+
    Returns:
      The function may return a dictionary of `Tensors`, which will be
-      written to logs and as TensorBoard summaries.
+      written to logs and as TensorBoard summaries. It can also be a
+      nested dictionary, yielding a hierarchy of summary directories.
    """
    pass
+
+  def eval_reduce(self, state=None, step_outputs=None) -> Any:
+    """A function to do the reduction on the evaluation outputs per step.
+
+    This is useful for passing states throughout evaluation. E.g. it can be used
+    to maintain the output losses from all the evaluation steps, and compute the
+    mean loss in `eval_end` function.
+
+    Args:
+      state: A maintained state throughout the evaluation.
+      step_outputs: Outputs from the current evaluation step.
+
+    Returns:
+      An output which is passed as `state` argument into `eval_reduce` function
+      for the next step. After evaluation is finished, the output from last step
+      will be passed into `eval_end` function.
+    """
+    pass
+
+  @property
+  def eval_dataset(self):
+    """Returns the train_datase instance."""
+    return self._eval_dataset
+
+  @eval_dataset.setter
+  def eval_dataset(self, eval_dataset):
+    """Set a new eval dataset and replace with the existing one.
+
+    Args:
+      eval_dataset: A tf.nest-compatible structure of tf.data.Dataset or
+        DistributedDataset.
+    """
+    self._eval_dataset = eval_dataset
--- a/orbit/standard_runner_test.py
+++ b/orbit/standard_runner_test.py
+# Copyright 2020 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for orbit.standard_runner."""
+
+from orbit import standard_runner
+
+import tensorflow as tf
+
+
+def dataset_fn(input_context=None):
+  del input_context
+
+  def dummy_data(_):
+    return tf.zeros((1, 1), dtype=tf.float32)
+
+  dataset = tf.data.Dataset.range(1)
+  dataset = dataset.repeat()
+  dataset = dataset.map(
+      dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+  return dataset
+
+
+class TestRunner(standard_runner.StandardTrainer,
+                 standard_runner.StandardEvaluator):
+  """Implements the training and evaluation APIs for tests."""
+
+  def __init__(self):
+    self.strategy = tf.distribute.get_strategy()
+    self.global_step = tf.Variable(
+        0,
+        trainable=False,
+        dtype=tf.int64,
+        name='global_step',
+        aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA)
+    standard_runner.StandardTrainer.__init__(self, train_dataset=None)
+    standard_runner.StandardEvaluator.__init__(self, eval_dataset=None)
+
+  def train_loop_begin(self):
+    self.train_dataset = (
+        self.strategy.experimental_distribute_datasets_from_function(dataset_fn)
+    )
+
+  def train_step(self, iterator):
+
+    def _replicated_step(_):
+      self.global_step.assign_add(1)
+
+    self.strategy.run(_replicated_step, args=(next(iterator),))
+
+  def train_loop_end(self):
+    return self.global_step.numpy()
+
+  def eval_begin(self):
+    self.eval_dataset = self.strategy.experimental_distribute_datasets_from_function(
+        dataset_fn)
+
+  def eval_step(self, iterator):
+
+    def _replicated_step(_):
+      self.global_step.assign_add(1)
+
+    self.strategy.run(_replicated_step, args=(next(iterator),))
+
+  def eval_end(self):
+    return self.global_step.numpy()
+
+
+class StandardRunnerTest(tf.test.TestCase):
+
+  def test_train(self):
+    test_runner = TestRunner()
+    self.assertEqual(
+        test_runner.train(tf.convert_to_tensor(10, dtype=tf.int32)), 10)
+
+  def test_eval(self):
+    test_runner = TestRunner()
+    self.assertEqual(
+        test_runner.evaluate(tf.convert_to_tensor(10, dtype=tf.int32)), 10)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/staging/training/utils.py
+++ b/official/staging/training/utils.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The Orbit Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,16 +14,14 @@
 # ==============================================================================
 """Some layered modules/functions to help users writing custom training loop."""

-from __future__ import absolute_import
-from __future__ import division
-# from __future__ import google_type_annotations
-from __future__ import print_function
-
 import abc
+import contextlib
+import functools
 import inspect
-import six

-import tensorflow.compat.v2 as tf
+import os
+import numpy as np
+import tensorflow as tf


 def create_loop_fn(step_fn):
@@ -79,7 +77,6 @@ def create_tf_while_loop_fn(step_fn):
    A callable defined as the `loop_fn` defination below.
  """

-  @tf.function
  def loop_fn(iterator, num_steps):
    """A loop function with multiple steps.

@@ -98,6 +95,30 @@ def create_tf_while_loop_fn(step_fn):
  return loop_fn


+def create_global_step() -> tf.Variable:
+  """Creates a `tf.Variable` suitable for use as a global step counter.
+
+  Creating and managing a global step variable may be necessary for
+  `AbstractTrainer` subclasses that perform multiple parameter updates per
+  `Controller` "step", or use different optimizers on different steps.
+
+  In these cases, an `optimizer.iterations` property generally can't be used
+  directly, since it would correspond to parameter updates instead of iterations
+  in the `Controller`'s training loop. Such use cases should simply call
+  `step.assign_add(1)` at the end of each step.
+
+  Returns:
+    A non-trainable scalar `tf.Variable` of dtype `tf.int64`, with only the
+    first replica's value retained when synchronizing across replicas in
+    a distributed setting.
+  """
+  return tf.Variable(
+      0,
+      dtype=tf.int64,
+      trainable=False,
+      aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA)
+
+
 def make_distributed_dataset(strategy, dataset_or_fn, *args, **kwargs):
  """A helper function to create distributed dataset.

@@ -130,10 +151,7 @@ def make_distributed_dataset(strategy, dataset_or_fn, *args, **kwargs):
    # names, pass `ctx` as the value of `input_context` when calling
    # `dataset_or_fn`. Otherwise `ctx` will not be used when calling
    # `dataset_or_fn`.
-    if six.PY3:
    argspec = inspect.getfullargspec(dataset_or_fn)
-    else:
-      argspec = inspect.getargspec(dataset_or_fn)
    args_names = argspec.args

    if "input_context" in args_names:
@@ -144,96 +162,99 @@ def make_distributed_dataset(strategy, dataset_or_fn, *args, **kwargs):
  return strategy.experimental_distribute_datasets_from_function(dataset_fn)


-class SummaryManager(object):
+class SummaryManager:
  """A class manages writing summaries."""

-  def __init__(self,
-               summary_writer,
-               summary_fn,
-               global_step=None,
-               summary_interval=None):
+  def __init__(self, summary_dir, summary_fn, global_step=None):
    """Construct a summary manager object.

    Args:
-      summary_writer: A `tf.summary.SummaryWriter` instance for writing
-        summaries.
+      summary_dir: the directory to write summaries.
      summary_fn: A callable defined as `def summary_fn(name, tensor,
        step=None)`, which describes the summary operation.
-      global_step: A `tf.Variable` instance for checking the current global step
-        value, in case users want to save summaries every N steps.
-      summary_interval: An integer, indicates the minimum step interval between
-        two summaries.
+      global_step: A `tf.Variable` instance for the global step.
    """
-    if summary_writer is not None:
-      self._summary_writer = summary_writer
-      self._enabled = True
-    else:
-      self._summary_writer = tf.summary.create_noop_writer()
-      self._enabled = False
+    self._enabled = (summary_dir is not None)
+    self._summary_dir = summary_dir
    self._summary_fn = summary_fn
+    self._summary_writers = {}

    if global_step is None:
      self._global_step = tf.summary.experimental.get_step()
    else:
      self._global_step = global_step

-    if summary_interval is not None:
-      if self._global_step is None:
-        raise ValueError("`summary_interval` is not None, but no `global_step` "
-                         "can be obtained ")
-      self._last_summary_step = self._global_step.numpy()
-    self._summary_interval = summary_interval
+  def summary_writer(self, relative_path=""):
+    """Returns the underlying summary writer.

-  @property
-  def summary_interval(self):
-    return self._summary_interval
-
-  @property
-  def summary_writer(self):
-    """Returns the underlying summary writer."""
-    return self._summary_writer
+    Args:
+      relative_path: The current path in which to write summaries, relative to
+        the summary directory. By default it is empty, which specifies the root
+        directory.
+    """
+    if self._summary_writers and relative_path in self._summary_writers:
+      return self._summary_writers[relative_path]
+    if self._enabled:
+      self._summary_writers[relative_path] = tf.summary.create_file_writer(
+          os.path.join(self._summary_dir, relative_path))
+    else:
+      self._summary_writers[relative_path] = tf.summary.create_noop_writer()
+    return self._summary_writers[relative_path]

  def flush(self):
-    """Flush the underlying summary writer."""
+    """Flush the underlying summary writers."""
    if self._enabled:
-      tf.summary.flush(self._summary_writer)
-
-  def write_summaries(self, items, always_write=True):
-    """Write a bulk of summaries.
+      tf.nest.map_structure(tf.summary.flush, self._summary_writers)
+
+  def write_summaries(self, summary_dict):
+    """Write summaries for the given values.
+
+    This recursively creates subdirectories for any nested dictionaries
+    provided in `summary_dict`, yielding a hierarchy of directories which will
+    then be reflected in the TensorBoard UI as different colored curves.
+
+    E.g. users may evaluate on muliple datasets and return `summary_dict` as a
+    nested dictionary.
+
+    ```
+    {
+        "dataset": {
+            "loss": loss,
+            "accuracy": accuracy
+        },
+        "dataset2": {
+            "loss": loss2,
+            "accuracy": accuracy2
+        },
+    }
+    ```
+
+    This will create two subdirectories "dataset" and "dataset2" inside the
+    summary root directory. Each directory will contain event files including
+    both "loss" and "accuracy" summaries.

    Args:
-      items: a dictionary of `Tensors` for writing summaries.
-      always_write: An optional boolean. If `True`, the manager will always
-        write summaries unless the summaries have been written for the same
-        step. Otherwise the manager will only write the summaries if the
-        interval between summaries are larger than `summary_interval`.
-
-    Returns:
-      A boolean indicates whether the summaries are written or not.
+      summary_dict: A dictionary of values. If any value in `summary_dict` is
+        itself a dictionary, then the function will recursively create
+        subdirectories with names given by the keys in the dictionary. The
+        Tensor values are summarized using the summary writer instance specific
+        to the parent relative path.
    """
-    # TODO(rxsang): Support writing summaries with nested structure, so users
-    # can split the summaries into different directories for nicer visualization
-    # in Tensorboard, like train and eval metrics.
    if not self._enabled:
-      return False
-
-    if self._summary_interval is not None:
-      current_step = self._global_step.numpy()
-      if current_step == self._last_summary_step:
-        return False
-      if not always_write and current_step < (self._last_summary_step +
-                                              self._summary_interval):
-        return False
-      self._last_summary_step = current_step
-
-    with self._summary_writer.as_default():
-      for name, tensor in items.items():
-        self._summary_fn(name, tensor, step=self._global_step)
-    return True
+      return
+    self._write_summaries(summary_dict)
+
+  def _write_summaries(self, summary_dict, relative_path=""):
+    for name, value in summary_dict.items():
+      if isinstance(value, dict):
+        self._write_summaries(
+            value, relative_path=os.path.join(relative_path, name))
+      else:
+        with self.summary_writer(relative_path).as_default():
+          self._summary_fn(name, value, step=self._global_step)


-@six.add_metaclass(abc.ABCMeta)
-class Trigger(object):
+class Trigger(metaclass=abc.ABCMeta):
  """An abstract class representing a "trigger" for some event."""

  @abc.abstractmethod
@@ -294,7 +315,7 @@ class IntervalTrigger(Trigger):
    self._last_trigger_value = 0


-class EpochHelper(object):
+class EpochHelper:
  """A Helper class to handle epochs in Customized Training Loop."""

  def __init__(self, epoch_steps, global_step):
@@ -340,3 +361,86 @@ class EpochHelper(object):
  @property
  def current_epoch(self):
    return self._current_epoch
+
+
+@contextlib.contextmanager
+def _soft_device_placement():
+  """Context manager for soft device placement, allowing summaries on CPU."""
+  original_setting = tf.config.get_soft_device_placement()
+  try:
+    tf.config.set_soft_device_placement(True)
+    yield
+  finally:
+    tf.config.set_soft_device_placement(original_setting)
+
+
+def train_function_with_summaries(*args, **kwargs):
+  """Utility function to support TPU summaries via multiple `tf.function`s.
+
+  This permits interleaving summaries inside TPU-compatible code, but without
+  any performance impact on steps that do not write summaries.
+
+  Usage is as a decorator, similar to `tf.function`, and any `tf.function`
+  arguments will be passed through if supplied:
+
+      @trainer.train_function_with_summaries
+      def train(self, num_steps):
+        ...
+
+  The decorated function is assumed to be a loop method accepting a `num_steps`
+  parameter, as for instance would be called within the `Controller`'s outer
+  train loop. The implementation here assumes that `summary_frequency` is
+  divisible by `steps_per_loop`. The decorated method should accept two
+  arguments, `self` and `num_steps`.
+
+  Two `tf.function` versions of `train_fn` are created: one inside a summary
+  writer scope with soft device placement enabled (used on steps that require
+  summary writing), and one with no summary writer present and soft device
+  placement disabled (used on all other steps).
+
+  Args:
+    *args: Arguments to pass through to `tf.function`.
+    **kwargs: Keyword arguments to pass through to `tf.function`.
+
+  Returns:
+    If the first argument is a callable, returns the decorated callable.
+    Otherwise, returns a decorator.
+  """
+
+  def decorator(train_fn):
+    # TODO(dhr): Validate the signature of train_fn?
+
+    train_fn_with_summaries = tf.function(train_fn, *args, **kwargs)
+    train_fn_without_summaries = tf.function(train_fn, *args, **kwargs)
+
+    @functools.wraps(train_fn)
+    def wrapper(self, num_steps):
+      if tf.summary.should_record_summaries():
+        with _soft_device_placement():
+          output = train_fn_with_summaries(self, tf.constant(1))
+          num_steps -= 1
+      if num_steps >= 1:
+        with tf.summary.record_if(False):
+          output = train_fn_without_summaries(self, num_steps)
+      return output
+
+    return wrapper
+
+  if args and callable(args[0]):
+    train_fn, args = args[0], args[1:]
+    return decorator(train_fn)
+  return decorator
+
+
+def get_value(x) -> np.ndarray:
+  """Returns the value of a variable/tensor.
+
+  Args:
+      x: input variable.
+
+  Returns:
+      A Numpy array or number.
+  """
+  if not tf.is_tensor(x):
+    return x
+  return x.numpy()
--- a/research/skip_thoughts/skip_thoughts/data/special_words.py
+++ b/research/skip_thoughts/skip_thoughts/data/special_words.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The Orbit Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,16 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Special word constants.
+"""Tests for orbit.utils."""

-NOTE: The ids of the EOS and UNK constants should not be modified. It is assumed
-that these always occupy the first two ids.
-"""
+from orbit import utils

-# End of sentence.
-EOS = "<eos>"
-EOS_ID = 0
+import tensorflow as tf

-# Unknown.
-UNK = "<unk>"
-UNK_ID = 1
+
+class UtilsTest(tf.test.TestCase):
+
+  def test_create_global_step(self):
+    step = utils.create_global_step()
+    self.assertEqual(step.dtype, tf.int64)
+    self.assertEqual(step, 0)
+    step.assign_add(1)
+    self.assertEqual(step, 1)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/README.md
+++ b/research/README.md
@@ -7,118 +7,73 @@ This directory contains code implementations and pre-trained models of published
 The research models are maintained by their respective authors.

 ## Table of Contents
- [Modeling Libraries and Models](#modeling-libraries-and-models)
- [Models and Implementations](#models-and-implementations)
-  * [Computer Vision](#computer-vision)
-  * [Natural Language Processing](#natural-language-processing)
-  * [Audio and Speech](#audio-and-speech)
-  * [Reinforcement Learning](#reinforcement-learning)
-  * [Others](#others)
- [Archived Models and Implementations](#warning-archived-models-and-implementations) (:no_entry_sign: No longer maintained)
+- [TensorFlow Research Models](#tensorflow-research-models)
+  - [Table of Contents](#table-of-contents)
+  - [Modeling Libraries and Models](#modeling-libraries-and-models)
+  - [Models and Implementations](#models-and-implementations)
+    - [Computer Vision](#computer-vision)
+    - [Natural Language Processing](#natural-language-processing)
+    - [Audio and Speech](#audio-and-speech)
+    - [Reinforcement Learning](#reinforcement-learning)
+    - [Others](#others)
+    - [Old Models and Implementations in TensorFlow 1](#old-models-and-implementations-in-tensorflow-1)
+  - [Contributions](#contributions)

 ## Modeling Libraries and Models

 | Directory | Name | Description | Maintainer(s) |
 |-----------|------|-------------|---------------|
-| [object_detection](object_detection) | TensorFlow Object Detection API | A framework that makes it easy to construct, train and deploy object detection models<br /><br />A collection of object detection models pre-trained on the COCO dataset, the Kitti dataset, the Open Images dataset, the AVA v2.1 dataset, and the iNaturalist Species Detection Dataset| @jch1, @tombstone, @pkulzc |
-| [slim](slim) | TensorFlow-Slim Image Classification Model Library | A lightweight high-level API of TensorFlow for defining, training and evaluating image classification models <br />• Inception V1/V2/V3/V4<br />• Inception-ResNet-v2<br />• ResNet V1/V2<br />• VGG 16/19<br />• MobileNet V1/V2/V3<br />• NASNet-A_Mobile/Large<br />• PNASNet-5_Large/Mobile | @sguada, @marksandler2 |
+| [object_detection](object_detection) | TensorFlow Object Detection API | A framework that makes it easy to construct, train and deploy object detection models<br /><br />A collection of object detection models pre-trained on the COCO dataset, the Kitti dataset, the Open Images dataset, the AVA v2.1 dataset, and the iNaturalist Species Detection Dataset| jch1, tombstone, pkulzc |
+| [slim](slim) | TensorFlow-Slim Image Classification Model Library | A lightweight high-level API of TensorFlow for defining, training and evaluating image classification models <br />• Inception V1/V2/V3/V4<br />• Inception-ResNet-v2<br />• ResNet V1/V2<br />• VGG 16/19<br />• MobileNet V1/V2/V3<br />• NASNet-A_Mobile/Large<br />• PNASNet-5_Large/Mobile | sguada, marksandler2 |

 ## Models and Implementations

 ### Computer Vision

-| Directory | Referenece (Paper) | Maintainer(s) |
-|-----------|--------------------|---------------|
-| [attention_ocr](attention_ocr) | [Attention-based Extraction of Structured Information from Street View Imagery](https://arxiv.org/abs/1704.03549) | xavigibert |
-| [autoaugment](autoaugment) | [1] [AutoAugment](https://arxiv.org/abs/1805.09501)<br />[2] [Wide Residual Networks](https://arxiv.org/abs/1605.07146)<br />[3] [Shake-Shake regularization](https://arxiv.org/abs/1705.07485)<br />[4] [ShakeDrop Regularization for Deep Residual Learning](https://arxiv.org/abs/1802.02375) | barretzoph |
-| [deeplab](deeplab) | [1] [DeepLabv1](https://arxiv.org/abs/1412.7062)<br />[2] [DeepLabv2](https://arxiv.org/abs/1606.00915)<br />[3] [DeepLabv3](https://arxiv.org/abs/1802.02611)<br />[4] [DeepLabv3+](https://arxiv.org/abs/1706.05587) | aquariusjay, yknzhu |
-| [delf](delf)  | [1] DELF (DEep Local Features): [Large-Scale Image Retrieval with Attentive Deep Local Features](https://arxiv.org/abs/1612.06321)<br />[2] [Detect-to-Retrieve](https://arxiv.org/abs/1812.01584) | andrefaraujo |
-| [lstm_object_detection](lstm_object_detection) | [Mobile Video Object Detection with Temporally-Aware Feature Maps](https://arxiv.org/abs/1711.06368) | yinxiaoli, yongzhe2160, lzyuan |
-| [marco](marco) | [Classification of crystallization outcomes using deep convolutional neural networks](https://arxiv.org/abs/1803.10342) | vincentvanhoucke |
-| [vid2depth](vid2depth) | [Unsupervised Learning of Depth and Ego-Motion from Monocular Video Using 3D Geometric Constraints](https://arxiv.org/abs/1802.05522) | rezama |
+| Directory | Paper(s) | Conference | Maintainer(s) |
+|-----------|----------|------------|---------------|
+| [attention_ocr](attention_ocr) | [Attention-based Extraction of Structured Information from Street View Imagery](https://arxiv.org/abs/1704.03549) | ICDAR 2017 | xavigibert |
+| [autoaugment](autoaugment) | [1] [AutoAugment](https://arxiv.org/abs/1805.09501)<br />[2] [Wide Residual Networks](https://arxiv.org/abs/1605.07146)<br />[3] [Shake-Shake regularization](https://arxiv.org/abs/1705.07485)<br />[4] [ShakeDrop Regularization for Deep Residual Learning](https://arxiv.org/abs/1802.02375) | [1] CVPR 2019<br />[2] BMVC 2016<br /> [3] ICLR 2017<br /> [4] ICLR 2018 | barretzoph |
+| [deeplab](deeplab) | [1] [DeepLabv1: Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected CRFs](https://arxiv.org/abs/1412.7062)<br />[2] [DeepLabv2: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs](https://arxiv.org/abs/1606.00915)<br />[3] [DeepLabv3: Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)<br />[4] [DeepLabv3+: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611)<br />| [1] ICLR 2015 <br />[2] TPAMI 2017 <br />[4] ECCV 2018 | aquariusjay, yknzhu |
+| [delf](delf)  | [1] DELF (DEep Local Features): [Large-Scale Image Retrieval with Attentive Deep Local Features](https://arxiv.org/abs/1612.06321)<br />[2] [Detect-to-Retrieve: Efficient Regional Aggregation for Image Search](https://arxiv.org/abs/1812.01584)<br />[3] DELG (DEep Local and Global features): [Unifying Deep Local and Global Features for Image Search](https://arxiv.org/abs/2001.05027)<br />[4] GLDv2: [Google Landmarks Dataset v2 -- A Large-Scale Benchmark for Instance-Level Recognition and Retrieval](https://arxiv.org/abs/2004.01804) | [1] ICCV 2017<br />[2] CVPR 2019<br />[4] CVPR 2020 | andrefaraujo |
+| [lstm_object_detection](lstm_object_detection) | [Mobile Video Object Detection with Temporally-Aware Feature Maps](https://arxiv.org/abs/1711.06368) | CVPR 2018 | yinxiaoli, yongzhe2160, lzyuan |
+| [marco](marco) | MARCO: [Classification of crystallization outcomes using deep convolutional neural networks](https://arxiv.org/abs/1803.10342) | | vincentvanhoucke |
+| [vid2depth](vid2depth) | [Unsupervised Learning of Depth and Ego-Motion from Monocular Video Using 3D Geometric Constraints](https://arxiv.org/abs/1802.05522) | CVPR 2018 | rezama |

 ### Natural Language Processing

-| Directory | Referenece (Paper) | Maintainer(s) |
-|-----------|--------------------|---------------|
-| [adversarial_text](adversarial_text) | [1] [Adversarial Training Methods for Semi-Supervised Text](https://arxiv.org/abs/1605.07725) Classification<br />[2] [Semi-supervised Sequence Learning](https://arxiv.org/abs/1511.01432) | rsepassi, a-dai |
-| [cvt_text](cvt_text) | [Semi-supervised sequence learning with cross-view training](https://arxiv.org/abs/1809.08370) | clarkkev, lmthang |
+| Directory | Paper(s) | Conference | Maintainer(s) |
+|-----------|----------|------------|---------------|
+| [adversarial_text](adversarial_text) | [1] [Adversarial Training Methods for Semi-Supervised Text](https://arxiv.org/abs/1605.07725) Classification<br />[2] [Semi-supervised Sequence Learning](https://arxiv.org/abs/1511.01432) | [1] ICLR 2017<br />[2] NIPS 2015 | rsepassi, a-dai |
+| [cvt_text](cvt_text) | [Semi-Supervised Sequence Modeling with Cross-View Training](https://arxiv.org/abs/1809.08370) | EMNLP 2018 | clarkkev, lmthang |

 ### Audio and Speech

-| Directory | Referenece (Paper) | Maintainer(s) |
-|-----------|--------------------|---------------|
-| [audioset](audioset) | [1] [AudioSet: A Large Scale Dataset of Audio Events](https://research.google/pubs/pub45857/)<br />[2] [CNN Architectures for Large-Scale Audio Classification](https://research.google/pubs/pub45611/) | plakal, dpwe |
+| Directory | Paper(s) | Conference | Maintainer(s) |
+|-----------|----------|------------|---------------|
+| [audioset](audioset) | [1] [Audio Set: An ontology and human-labeled dataset for audio events](https://research.google/pubs/pub45857/)<br />[2] [CNN Architectures for Large-Scale Audio Classification](https://research.google/pubs/pub45611/) | ICASSP 2017 | plakal, dpwe |
+| [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | ICLR 2016 | yhliang2018 |

 ### Reinforcement Learning

-| Directory | Referenece (Paper) | Maintainer(s) |
-|-----------|--------------------|---------------|
-| [efficient-hrl](efficient-hrl) | [1] [Data-Efficient Hierarchical Reinforcement Learning](https://arxiv.org/abs/1805.08296)<br />[2] [Near-Optimal Representation Learning for Hierarchical Reinforcement Learning](https://arxiv.org/abs/1810.01257) | ofirnachum |
-| [pcl_rl](pcl_rl) | [1] [Improving Policy Gradient by Exploring Under-appreciated Rewards](https://arxiv.org/abs/1611.09321)<br />[2] [Bridging the Gap Between Value and Policy Based Reinforcement Learning](https://arxiv.org/abs/1702.08892)<br />[3] [Trust-PCL: An Off-Policy Trust Region Method for Continuous Control](https://arxiv.org/abs/1707.01891) | ofirnachum |
+| Directory | Paper(s) | Conference | Maintainer(s) |
+|-----------|----------|------------|---------------|
+| [efficient-hrl](efficient-hrl) | [1] [Data-Efficient Hierarchical Reinforcement Learning](https://arxiv.org/abs/1805.08296)<br />[2] [Near-Optimal Representation Learning for Hierarchical Reinforcement Learning](https://arxiv.org/abs/1810.01257) | [1] NIPS 2018<br /> [2] ICLR 2019 | ofirnachum |
+| [pcl_rl](pcl_rl) | [1] [Improving Policy Gradient by Exploring Under-appreciated Rewards](https://arxiv.org/abs/1611.09321)<br />[2] [Bridging the Gap Between Value and Policy Based Reinforcement Learning](https://arxiv.org/abs/1702.08892)<br />[3] [Trust-PCL: An Off-Policy Trust Region Method for Continuous Control](https://arxiv.org/abs/1707.01891) | [1] ICLR 2017<br />[2] NIPS 2017<br />[3] ICLR 2018 | ofirnachum |

 ### Others

-| Directory | Referenece (Paper) | Maintainer(s) |
-|-----------|--------------------|---------------|
-| [lfads](lfads) | [LFADS - Latent Factor Analysis via Dynamical Systems](https://doi.org/10.1101/152884) | jazcollins, sussillo |
-| [rebar](rebar) | [REBAR: Low-variance, unbiased gradient estimates for discrete latent variable models](https://arxiv.org/abs/1703.07370) | gjtucker |
+| Directory | Paper(s) | Conference | Maintainer(s) |
+|-----------|----------|------------|---------------|
+| [lfads](lfads) | [LFADS - Latent Factor Analysis via Dynamical Systems](https://arxiv.org/abs/1608.06315) | | jazcollins, sussillo |
+| [rebar](rebar) | [REBAR: Low-variance, unbiased gradient estimates for discrete latent variable models](https://arxiv.org/abs/1703.07370) | NIPS 2017 | gjtucker |

---
+### Old Models and Implementations in TensorFlow 1

-## :warning: Archived Models and Implementations
-
-The following research models are no longer maintained.
-
-**Note**: We will remove archived models from the master branch in June, 2020. 
-After removal, you will still be able to access archived models in the archive branch.
-
-| Directory | Referenece (Paper) | Maintainer(s) |
-|-----------|--------------------|---------------|
-| [adv_imagenet_models](adv_imagenet_models) | [1] [Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)<br />[2] [Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204) | alexeykurakin |
-| [adversarial_crypto](adversarial_crypto) | [Learning to Protect Communications with Adversarial Neural Cryptography](https://arxiv.org/abs/1610.06918) | dave-andersen |
-| [adversarial_logit_pairing](adversarial_logit_pairing) | [Adversarial Logit Pairing](https://arxiv.org/abs/1803.06373) | alexeykurakin |
-| [autoencoder](autoencoder) | Various autoencoders | snurkabill |
-| [brain_coder](brain_coder) | [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526) | danabo, mnorouzi |
-| [cognitive_mapping_and_planning](cognitive_mapping_and_planning) | [Cognitive Mapping and Planning for Visual Navigation](https://arxiv.org/abs/1702.03920) | s-gupta |
-| [compression](compression) | [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148) | nmjohn |
-| [deep_contextual_bandits](deep_contextual_bandits) | [Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling](https://arxiv.org/abs/1802.09127) | rikel |
-| [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | yhliang2018 |
-| [domain_adaptation](domain_adaptation) | [1] [Domain Separation Networks](https://arxiv.org/abs/1608.06019) <br />[2] [Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks](https://arxiv.org/abs/1612.05424) | bousmalis, dmrd |
-| [feelvos](feelvos)| [FEELVOS](https://arxiv.org/abs/1902.09513) | pvoigtlaender, yuningchai, aquariusjay |
-| [fivo](fivo)| [Filtering variational objectives for training generative sequence models](https://arxiv.org/abs/1705.09279) | dieterichlawson |
-| [global_objectives](global_objectives) | [Scalable Learning of Non-Decomposable Objectives](https://arxiv.org/abs/1608.04802) | mackeya-google |
-| [im2txt](im2txt) | [Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning Challenge](https://arxiv.org/abs/1609.06647) | cshallue |
-| [inception](inception) | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | shlens, vincentvanhoucke |
-| [keypointnet](keypointnet) | [KeypointNet](https://arxiv.org/abs/1807.03146) | mnorouzi |
-| [learned_optimizer](learned_optimizer) | [Learned Optimizers that Scale and Generalize](https://arxiv.org/abs/1703.04813) | olganw, nirum |
-| [learning_to_remember_rare_events](learning_to_remember_rare_events) | [Learning to Remember Rare Events](https://arxiv.org/abs/1703.03129) | lukaszkaiser, ofirnachum |
-| [learning_unsupervised_learning](learning_unsupervised_learning) | [Meta-Learning Update Rules for Unsupervised Representation Learning](https://arxiv.org/abs/1804.00222) | lukemetz, nirum |
-| [lexnet_nc](lexnet_nc) | [Olive Oil is Made of Olives, Baby Oil is Made for Babies: Interpreting Noun Compounds using Paraphrases in a Neural Model](https://arxiv.org/abs/1803.08073) | vered1986, waterson |
-| [lm_1b](lm_1b) | [Exploring the Limits of Language Modeling](https://arxiv.org/abs/1602.02410) | oriolvinyals, panyx0718 |
-| [lm_commonsense](lm_commonsense) | [A Simple Method for Commonsense Reasoning](https://arxiv.org/abs/1806.02847) | thtrieu |
-| [maskgan](maskgan)| [MaskGAN: Better Text Generation via Filling in the______](https://arxiv.org/abs/1801.07736) | liamb315, a-dai |
-| [namignizer](namignizer)| Namignizer | knathanieltucker |
-| [neural_gpu](neural_gpu)| [Neural GPUs Learn Algorithms](https://arxiv.org/abs/1511.08228) | lukaszkaiser |
-| [neural_programmer](neural_programmer) | [Learning a Natural Language Interface with Neural Programmer](https://arxiv.org/abs/1611.08945) | arvind2505 |
-| [next_frame_prediction](next_frame_prediction) | [Visual Dynamics](https://arxiv.org/abs/1607.02586) | panyx0718 |
-| [ptn](ptn) | [Perspective Transformer Nets](https://arxiv.org/abs/1612.00814) | xcyan, arkanath, hellojas, honglaklee |
-| [qa_kg](qa_kg) | [Learning to Reason](https://arxiv.org/abs/1704.05526) | yuyuz |
-| [real_nvp](real_nvp) | [Density estimation using Real NVP](https://arxiv.org/abs/1605.08803) | laurent-dinh |
-| [sentiment_analysis](sentiment_analysis)| [Effective Use of Word Order for Text Categorization with Convolutional Neural Networks](https://arxiv.org/abs/1412.1058) | sculd |
-| [seq2species](seq2species) | [Seq2Species: A deep learning approach to pattern recognition for short DNA sequences](https://doi.org/10.1101/353474) | apbusia, depristo |
-| [skip_thoughts](skip_thoughts) | [Skip-Thought Vectors](https://arxiv.org/abs/1506.06726) | cshallue |
-| [steve](steve) | [Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion](https://arxiv.org/abs/1807.01675) | buckman-google |
-| [street](street) | [End-to-End Interpretation of the French Street Name Signs Dataset](https://arxiv.org/abs/1702.03970) | theraysmith |
-| [struct2depth](struct2depth)| [Depth Prediction Without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos](https://arxiv.org/abs/1811.06152) | aneliaangelova |
-| [swivel](swivel) | [Swivel: Improving Embeddings by Noticing What's Missing](https://arxiv.org/abs/1602.02215) | waterson |
-| [tcn](tcn) | [Time-Contrastive Networks: Self-Supervised Learning from Video](https://arxiv.org/abs/1704.06888) | coreylynch, sermanet |
-| [textsum](textsum)| [A Neural Attention Model for Abstractive Sentence Summarization](https://arxiv.org/abs/1509.00685) | panyx0718, peterjliu |
-| [transformer](transformer) | [Spatial Transformer Network](https://arxiv.org/abs/1506.02025) | daviddao|
-| [video_prediction](video_prediction) | [Unsupervised Learning for Physical Interaction through Video Prediction](https://arxiv.org/abs/1605.07157) | cbfinn |
+:warning: If you are looking for old models, please visit the [Archive branch](https://github.com/tensorflow/models/tree/archive/research).

 ---

 ## Contributions

-If you want to contribute, please review the [contribution guidelines](../../../wiki/How-to-contribute).
+If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).