Merge remote-tracking branch 'upstream/master'

1f4747a4 · pkulzc · d2d01f4f · a7aa25d3 · 1f4747a4 · 1f4747a4
Commit 1f4747a4 authored Apr 02, 2018 by pkulzc
20 changed files
--- a/official/README.md
+++ b/official/README.md
@@ -28,4 +28,6 @@ If you would like to make any fixes or improvements to the models, please [submi

 The *Official Models* are made available as a Python module. To run the models and associated scripts, add the top-level ***/models*** folder to the Python path with the command: `export PYTHONPATH="$PYTHONPATH:/path/to/models"`

+To install dependencies pass `-r official/requirements.txt` to pip. (i.e. `pip3 install --user -r official/requirements.txt`)
+
 To make Official Models easier to use, we are planning to create a pip installable Official Models package. This is being tracked in [#917](https://github.com/tensorflow/models/issues/917).
--- a/official/benchmark/datastore/schema/benchmark_run.json
+++ b/official/benchmark/datastore/schema/benchmark_run.json
@@ -15,7 +15,7 @@
    "description": "The date when the test of the model is started",
    "mode": "REQUIRED",
    "name": "run_date",
-    "type": "DATETIME"
+    "type": "TIMESTAMP"
  },
  {
    "description": "The tensorflow version information.",
@@ -58,7 +58,7 @@
    "type": "RECORD"
  },
  {
-    "description": "Enviornment variables when the benchmark run is executed.",
+    "description": "Environment variables when the benchmark run is executed.",
    "fields": [
      {
        "description": "The name of the variable.",
@@ -74,7 +74,27 @@
      }
    ],
    "mode": "REPEATED",
-    "name": "enviornment_variable",
+    "name": "environment_variable",
+    "type": "RECORD"
+  },
+  {
+    "description": "TF Environment variables when the benchmark run is executed.",
+    "fields": [
+      {
+        "description": "The name of the variable.",
+        "mode": "REQUIRED",
+        "name": "name",
+        "type": "STRING"
+      },
+      {
+        "description": "The value of the variable.",
+        "mode": "NULLABLE",
+        "name": "value",
+        "type": "STRING"
+      }
+    ],
+    "mode": "REPEATED",
+    "name": "tensorflow_environment_variables",
    "type": "RECORD"
  },
  {

--- a/official/mnist/mnist.py
+++ b/official/mnist/mnist.py
@@ -175,11 +175,14 @@ def validate_batch_size_for_multi_gpu(batch_size):
    raise ValueError(err)


-def main(_):
+def main(argv):
+  parser = MNISTArgParser()
+  flags = parser.parse_args(args=argv[1:])
+
  model_function = model_fn

-  if FLAGS.multi_gpu:
-    validate_batch_size_for_multi_gpu(FLAGS.batch_size)
+  if flags.multi_gpu:
+    validate_batch_size_for_multi_gpu(flags.batch_size)

    # There are two steps required if using multi-GPU: (1) wrap the model_fn,
    # and (2) wrap the optimizer. The first happens here, and (2) happens
@@ -187,16 +190,16 @@ def main(_):
    model_function = tf.contrib.estimator.replicate_model_fn(
        model_fn, loss_reduction=tf.losses.Reduction.MEAN)

-  data_format = FLAGS.data_format
+  data_format = flags.data_format
  if data_format is None:
    data_format = ('channels_first'
                   if tf.test.is_built_with_cuda() else 'channels_last')
  mnist_classifier = tf.estimator.Estimator(
      model_fn=model_function,
-      model_dir=FLAGS.model_dir,
+      model_dir=flags.model_dir,
      params={
          'data_format': data_format,
-          'multi_gpu': FLAGS.multi_gpu
+          'multi_gpu': flags.multi_gpu
      })

  # Set up training and evaluation input functions.
@@ -206,35 +209,35 @@ def main(_):
    # When choosing shuffle buffer sizes, larger sizes result in better
    # randomness, while smaller sizes use less memory. MNIST is a small
    # enough dataset that we can easily shuffle the full epoch.
-    ds = dataset.train(FLAGS.data_dir)
-    ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size)
+    ds = dataset.train(flags.data_dir)
+    ds = ds.cache().shuffle(buffer_size=50000).batch(flags.batch_size)

    # Iterate through the dataset a set number (`epochs_between_evals`) of times
    # during each training session.
-    ds = ds.repeat(FLAGS.epochs_between_evals)
+    ds = ds.repeat(flags.epochs_between_evals)
    return ds

  def eval_input_fn():
-    return dataset.test(FLAGS.data_dir).batch(
-        FLAGS.batch_size).make_one_shot_iterator().get_next()
+    return dataset.test(flags.data_dir).batch(
+        flags.batch_size).make_one_shot_iterator().get_next()

  # Set up hook that outputs training logs every 100 steps.
  train_hooks = hooks_helper.get_train_hooks(
-      FLAGS.hooks, batch_size=FLAGS.batch_size)
+      flags.hooks, batch_size=flags.batch_size)

  # Train and evaluate model.
-  for _ in range(FLAGS.train_epochs // FLAGS.epochs_between_evals):
+  for _ in range(flags.train_epochs // flags.epochs_between_evals):
    mnist_classifier.train(input_fn=train_input_fn, hooks=train_hooks)
    eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
    print('\nEvaluation results:\n\t%s\n' % eval_results)

  # Export the model
-  if FLAGS.export_dir is not None:
+  if flags.export_dir is not None:
    image = tf.placeholder(tf.float32, [None, 28, 28])
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'image': image,
    })
-    mnist_classifier.export_savedmodel(FLAGS.export_dir, input_fn)
+    mnist_classifier.export_savedmodel(flags.export_dir, input_fn)


 class MNISTArgParser(argparse.ArgumentParser):
@@ -243,14 +246,9 @@ class MNISTArgParser(argparse.ArgumentParser):
  def __init__(self):
    super(MNISTArgParser, self).__init__(parents=[
        parsers.BaseParser(),
-        parsers.ImageModelParser()])
-
-    self.add_argument(
-        '--export_dir',
-        type=str,
-        help='[default: %(default)s] If set, a SavedModel serialization of the '
-             'model will be exported to this directory at the end of training. '
-             'See the README for more details and relevant links.')
+        parsers.ImageModelParser(),
+        parsers.ExportParser(),
+    ])

    self.set_defaults(
        data_dir='/tmp/mnist_data',
@@ -261,6 +259,4 @@ class MNISTArgParser(argparse.ArgumentParser):

 if __name__ == '__main__':
  tf.logging.set_verbosity(tf.logging.INFO)
-  parser = MNISTArgParser()
-  FLAGS, unparsed = parser.parse_known_args()
-  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
+  main(argv=sys.argv)
--- a/official/mnist/mnist_eager.py
+++ b/official/mnist/mnist_eager.py
@@ -38,8 +38,6 @@ from official.mnist import dataset as mnist_dataset
 from official.mnist import mnist
 from official.utils.arg_parsers import parsers

-FLAGS = None
-

 def loss(logits, labels):
  return tf.reduce_mean(
@@ -97,35 +95,38 @@ def test(model, dataset):
    tf.contrib.summary.scalar('accuracy', accuracy.result())


-def main(_):
+def main(argv):
+  parser = MNISTEagerArgParser()
+  flags = parser.parse_args(args=argv[1:])
+
  tfe.enable_eager_execution()

  # Automatically determine device and data_format
  (device, data_format) = ('/gpu:0', 'channels_first')
-  if FLAGS.no_gpu or tfe.num_gpus() <= 0:
+  if flags.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  # If data_format is defined in FLAGS, overwrite automatically set value.
-  if FLAGS.data_format is not None:
+  if flags.data_format is not None:
    data_format = data_format
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
-  train_ds = mnist_dataset.train(FLAGS.data_dir).shuffle(60000).batch(
-      FLAGS.batch_size)
-  test_ds = mnist_dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size)
+  train_ds = mnist_dataset.train(flags.data_dir).shuffle(60000).batch(
+      flags.batch_size)
+  test_ds = mnist_dataset.test(flags.data_dir).batch(flags.batch_size)

  # Create the model and optimizer
  model = mnist.Model(data_format)
-  optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)
+  optimizer = tf.train.MomentumOptimizer(flags.lr, flags.momentum)

  # Create file writers for writing TensorBoard summaries.
-  if FLAGS.output_dir:
+  if flags.output_dir:
    # Create directories to which summaries will be written
    # tensorboard --logdir=<output_dir>
    # can then be used to see the recorded summaries.
-    train_dir = os.path.join(FLAGS.output_dir, 'train')
-    test_dir = os.path.join(FLAGS.output_dir, 'eval')
-    tf.gfile.MakeDirs(FLAGS.output_dir)
+    train_dir = os.path.join(flags.output_dir, 'train')
+    test_dir = os.path.join(flags.output_dir, 'eval')
+    tf.gfile.MakeDirs(flags.output_dir)
  else:
    train_dir = None
    test_dir = None
@@ -135,19 +136,19 @@ def main(_):
      test_dir, flush_millis=10000, name='test')

  # Create and restore checkpoint (if one exists on the path)
-  checkpoint_prefix = os.path.join(FLAGS.model_dir, 'ckpt')
+  checkpoint_prefix = os.path.join(flags.model_dir, 'ckpt')
  step_counter = tf.train.get_or_create_global_step()
  checkpoint = tfe.Checkpoint(
      model=model, optimizer=optimizer, step_counter=step_counter)
  # Restore variables on creation if a checkpoint exists.
-  checkpoint.restore(tf.train.latest_checkpoint(FLAGS.model_dir))
+  checkpoint.restore(tf.train.latest_checkpoint(flags.model_dir))

  # Train and evaluate for a set number of epochs.
  with tf.device(device):
-    for _ in range(FLAGS.train_epochs):
+    for _ in range(flags.train_epochs):
      start = time.time()
      with summary_writer.as_default():
-        train(model, optimizer, train_ds, step_counter, FLAGS.log_interval)
+        train(model, optimizer, train_ds, step_counter, flags.log_interval)
      end = time.time()
      print('\nTrain time for epoch #%d (%d total steps): %f' %
            (checkpoint.save_counter.numpy() + 1,
@@ -205,6 +206,4 @@ class MNISTEagerArgParser(argparse.ArgumentParser):
    )

 if __name__ == '__main__':
-  parser = MNISTEagerArgParser()
-  FLAGS, unparsed = parser.parse_known_args()
-  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
+  main(argv=sys.argv)
--- a/official/mnist/mnist_tpu.py
+++ b/official/mnist/mnist_tpu.py
@@ -46,6 +46,10 @@ tf.flags.DEFINE_string(
    "metadata.")

 # Model specific parameters
+tf.flags.DEFINE_string(
+    "master", default=None,
+    help="GRPC URL of the master (e.g. grpc://ip.address.of.tpu:8470). You "
+    "must specify either this flag or --tpu.")
 tf.flags.DEFINE_string("data_dir", "",
                       "Path to directory containing the MNIST dataset")
 tf.flags.DEFINE_string("model_dir", None, "Estimator model_dir")
@@ -132,11 +136,24 @@ def main(argv):
  del argv  # Unused.
  tf.logging.set_verbosity(tf.logging.INFO)

-  tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
-      FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
+  if FLAGS.master is None and FLAGS.tpu is None:
+    raise RuntimeError('You must specify either --master or --tpu.')
+  if FLAGS.master is not None:
+    if FLAGS.tpu is not None:
+      tf.logging.warn('Both --master and --tpu are set. Ignoring '
+                      '--tpu and using --master.')
+    tpu_grpc_url = FLAGS.master
+  else:
+    tpu_cluster_resolver = (
+        tf.contrib.cluster_resolver.TPUClusterResolver(
+            FLAGS.tpu,
+            zone=FLAGS.tpu_zone,
+            project=FLAGS.gcp_project))
+    tpu_grpc_url = tpu_cluster_resolver.get_master()

  run_config = tf.contrib.tpu.RunConfig(
-      cluster=tpu_cluster_resolver,
+      master=tpu_grpc_url,
+      evaluation_master=tpu_grpc_url,
      model_dir=FLAGS.model_dir,
      session_config=tf.ConfigProto(
          allow_soft_placement=True, log_device_placement=True),

--- a/official/requirements.txt
+++ b/official/requirements.txt
+psutil>=5.4.3
+py-cpuinfo>=3.3.0
+google-cloud-bigquery>=0.31.0
\ No newline at end of file
--- a/official/resnet/README.md
+++ b/official/resnet/README.md
@@ -51,4 +51,13 @@ The model will begin training and will automatically evaluate itself on the vali
 Note that there are a number of other options you can specify, including `--model_dir` to choose where to store the model and `--resnet_size` to choose the model size (options include ResNet-18 through ResNet-200). See [`resnet.py`](resnet.py) for the full list of options.

 ### Pre-trained model
-You can download a 190 MB pre-trained version of ResNet-50 achieving 75.3% top-1 single-crop accuracy here: [resnet50_2017_11_30.tar.gz](http://download.tensorflow.org/models/official/resnet50_2017_11_30.tar.gz). Simply download and uncompress the file, and point the model to the extracted directory using the `--model_dir` flag.
+You can download 190 MB pre-trained versions of ResNet-50 achieving 76.3% and 75.3% (respectively) top-1 single-crop accuracy here: [resnetv2_imagenet_checkpoint.tar.gz](http://download.tensorflow.org/models/official/resnetv2_imagenet_checkpoint.tar.gz), [resnetv1_imagenet_checkpoint.tar.gz](http://download.tensorflow.org/models/official/resnetv1_imagenet_checkpoint.tar.gz). Simply download and uncompress the file, and point the model to the extracted directory using the `--model_dir` flag.
+
+Other versions and formats:
+
+* [ResNet-v2-ImageNet Checkpoint](http://download.tensorflow.org/models/official/resnetv2_imagenet_checkpoint.tar.gz)
+* [ResNet-v2-ImageNet SavedModel](http://download.tensorflow.org/models/official/resnetv2_imagenet_savedmodel.tar.gz)
+* [ResNet-v2-ImageNet Frozen Graph](http://download.tensorflow.org/models/official/resnetv2_imagenet_frozen_graph.pb)
+* [ResNet-v1-ImageNet Checkpoint](http://download.tensorflow.org/models/official/resnetv1_imagenet_checkpoint.tar.gz)
+* [ResNet-v1-ImageNet SavedModel](http://download.tensorflow.org/models/official/resnetv1_imagenet_savedmodel.tar.gz)
+* [ResNet-v1-ImageNet Frozen Graph](http://download.tensorflow.org/models/official/resnetv1_imagenet_frozen_graph.pb)
--- a/official/resnet/cifar10_main.py
+++ b/official/resnet/cifar10_main.py
@@ -228,7 +228,10 @@ def main(argv):
  flags = parser.parse_args(args=argv[1:])

  input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn
-  resnet_run_loop.resnet_main(flags, cifar10_model_fn, input_function)
+
+  resnet_run_loop.resnet_main(
+      flags, cifar10_model_fn, input_function,
+      shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])


 if __name__ == '__main__':

--- a/official/resnet/imagenet_main.py
+++ b/official/resnet/imagenet_main.py
@@ -305,7 +305,10 @@ def main(argv):
  flags = parser.parse_args(args=argv[1:])

  input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn
-  resnet_run_loop.resnet_main(flags, imagenet_model_fn, input_function)
+
+  resnet_run_loop.resnet_main(
+      flags, imagenet_model_fn, input_function,
+      shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])


 if __name__ == '__main__':

--- a/official/resnet/layer_test.py
+++ b/official/resnet/layer_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test that the definitions of ResNet layers haven't changed.
+
+These tests will fail if either:
+  a)  The graph of a resnet layer changes and the change is significant enough
+      that it can no longer load existing checkpoints.
+  b)  The numerical results produced by the layer change.
+
+A warning will be issued if the graph changes, but the checkpoint still loads.
+
+In the event that a layer change is intended, or the TensorFlow implementation
+of a layer changes (and thus changes the graph), regenerate using the command:
+
+  $ python3 layer_test.py -regen
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+import tensorflow as tf   # pylint: disable=g-bad-import-order
+from official.resnet import resnet_model
+from official.utils.testing import reference_data
+
+
+DATA_FORMAT = "channels_last"  # CPU instructions often preclude channels_first
+BATCH_SIZE = 32
+BLOCK_TESTS = [
+    dict(bottleneck=True, projection=True, version=1, width=8, channels=4),
+    dict(bottleneck=True, projection=True, version=2, width=8, channels=4),
+    dict(bottleneck=True, projection=False, version=1, width=8, channels=4),
+    dict(bottleneck=True, projection=False, version=2, width=8, channels=4),
+    dict(bottleneck=False, projection=True, version=1, width=8, channels=4),
+    dict(bottleneck=False, projection=True, version=2, width=8, channels=4),
+    dict(bottleneck=False, projection=False, version=1, width=8, channels=4),
+    dict(bottleneck=False, projection=False, version=2, width=8, channels=4),
+]
+
+
+class BaseTest(reference_data.BaseTest):
+  """Tests for core ResNet layers."""
+
+  @property
+  def test_name(self):
+    return "resnet"
+
+  def _batch_norm_ops(self, test=False):
+    name = "batch_norm"
+
+    g = tf.Graph()
+    with g.as_default():
+      tf.set_random_seed(self.name_to_seed(name))
+      input_tensor = tf.get_variable(
+          "input_tensor", dtype=tf.float32,
+          initializer=tf.random_uniform((32, 16, 16, 3), maxval=1)
+      )
+      layer = resnet_model.batch_norm(
+          inputs=input_tensor, data_format=DATA_FORMAT, training=True)
+
+    self._save_or_test_ops(
+        name=name, graph=g, ops_to_eval=[input_tensor, layer], test=test,
+        correctness_function=self.default_correctness_function
+    )
+
+  def make_projection(self, filters_out, strides, data_format):
+    """1D convolution with stride projector.
+
+    Args:
+      filters_out: Number of filters in the projection.
+      strides: Stride length for convolution.
+      data_format: channels_first or channels_last
+
+    Returns:
+      A CNN projector function with kernel_size 1.
+    """
+    def projection_shortcut(inputs):
+      return resnet_model.conv2d_fixed_padding(
+          inputs=inputs, filters=filters_out, kernel_size=1, strides=strides,
+          data_format=data_format)
+    return projection_shortcut
+
+  def _resnet_block_ops(self, test, batch_size, bottleneck, projection,
+                        version, width, channels):
+    """Test whether resnet block construction has changed.
+
+    Args:
+      test: Whether or not to run as a test case.
+      batch_size: Number of points in the fake image. This is needed due to
+        batch normalization.
+      bottleneck: Whether or not to use bottleneck layers.
+      projection: Whether or not to project the input.
+      version: Which version of ResNet to test.
+      width: The width of the fake image.
+      channels: The number of channels in the fake image.
+    """
+
+    name = "batch-size-{}_{}{}_version-{}_width-{}_channels-{}".format(
+        batch_size,
+        "bottleneck" if bottleneck else "building",
+        "_projection" if projection else "",
+        version,
+        width,
+        channels
+    )
+
+    if version == 1:
+      block_fn = resnet_model._building_block_v1
+      if bottleneck:
+        block_fn = resnet_model._bottleneck_block_v1
+    else:
+      block_fn = resnet_model._building_block_v2
+      if bottleneck:
+        block_fn = resnet_model._bottleneck_block_v2
+
+    g = tf.Graph()
+    with g.as_default():
+      tf.set_random_seed(self.name_to_seed(name))
+      strides = 1
+      channels_out = channels
+      projection_shortcut = None
+      if projection:
+        strides = 2
+        channels_out *= strides
+        projection_shortcut = self.make_projection(
+            filters_out=channels_out, strides=strides, data_format=DATA_FORMAT)
+
+      filters = channels_out
+      if bottleneck:
+        filters = channels_out // 4
+
+      input_tensor = tf.get_variable(
+          "input_tensor", dtype=tf.float32,
+          initializer=tf.random_uniform((batch_size, width, width, channels),
+                                        maxval=1)
+      )
+
+      layer = block_fn(inputs=input_tensor, filters=filters, training=True,
+                       projection_shortcut=projection_shortcut, strides=strides,
+                       data_format=DATA_FORMAT)
+
+    self._save_or_test_ops(
+        name=name, graph=g, ops_to_eval=[input_tensor, layer], test=test,
+        correctness_function=self.default_correctness_function
+    )
+
+  def test_batch_norm(self):
+    self._batch_norm_ops(test=True)
+
+  def test_block_0(self):
+    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[0])
+
+  def test_block_1(self):
+    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[1])
+
+  def test_block_2(self):
+    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[2])
+
+  def test_block_3(self):
+    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[3])
+
+  def test_block_4(self):
+    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[4])
+
+  def test_block_5(self):
+    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[5])
+
+  def test_block_6(self):
+    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[6])
+
+  def test_block_7(self):
+    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[7])
+
+  def regenerate(self):
+    """Create reference data files for ResNet layer tests."""
+    self._batch_norm_ops(test=False)
+    for block_params in BLOCK_TESTS:
+      self._resnet_block_ops(test=False, batch_size=BATCH_SIZE, **block_params)
+
+
+if __name__ == "__main__":
+  reference_data.main(argv=sys.argv, test_class=BaseTest)
--- a/official/resnet/resnet_run_loop.py
+++ b/official/resnet/resnet_run_loop.py
@@ -30,7 +30,9 @@ import tensorflow as tf  # pylint: disable=g-bad-import-order

 from official.resnet import resnet_model
 from official.utils.arg_parsers import parsers
+from official.utils.export import export
 from official.utils.logging import hooks_helper
+from official.utils.logging import logger


 ################################################################################
@@ -218,7 +220,13 @@ def resnet_model_fn(features, labels, mode, model_class,
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
-    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+    # Return the predictions and the specification for serving a SavedModel
+    return tf.estimator.EstimatorSpec(
+        mode=mode,
+        predictions=predictions,
+        export_outputs={
+            'predict': tf.estimator.export.PredictOutput(predictions)
+        })

  # Calculate loss, which includes softmax cross entropy and L2 regularization.
  cross_entropy = tf.losses.softmax_cross_entropy(
@@ -309,8 +317,20 @@ def validate_batch_size_for_multi_gpu(batch_size):
    raise ValueError(err)


-def resnet_main(flags, model_function, input_function):
-  """Shared main loop for ResNet Models."""
+def resnet_main(flags, model_function, input_function, shape=None):
+  """Shared main loop for ResNet Models.
+
+  Args:
+    flags: FLAGS object that contains the params for running. See
+      ResnetArgParser for created flags.
+    model_function: the function that instantiates the Model and builds the
+      ops for train/eval. This will be passed directly into the estimator.
+    input_function: the function that processes the dataset and returns a
+      dataset that the estimator can train on. This will be wrapped with
+      all the relevant flags for running and passed to estimator.
+    shape: list of ints representing the shape of the images used for training.
+      This is only used if flags.export_dir is passed.
+  """

  # Using the Winograd non-fused algorithms provides a small performance boost.
  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
@@ -347,9 +367,17 @@ def resnet_main(flags, model_function, input_function):
          'version': flags.version,
      })

+  if flags.benchmark_log_dir is not None:
+    benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
+    benchmark_logger.log_run_info("resnet")
+  else:
+    benchmark_logger = None
+
  for _ in range(flags.train_epochs // flags.epochs_between_evals):
    train_hooks = hooks_helper.get_train_hooks(
-        flags.hooks, batch_size=flags.batch_size)
+        flags.hooks,
+        batch_size=flags.batch_size,
+        benchmark_log_dir=flags.benchmark_log_dir)

    print('Starting a training cycle.')

@@ -377,16 +405,38 @@ def resnet_main(flags, model_function, input_function):
                                       steps=flags.max_train_steps)
    print(eval_results)

+    if benchmark_logger:
+      benchmark_logger.log_estimator_evaluation_result(eval_results)
+
+  if flags.export_dir is not None:
+    warn_on_multi_gpu_export(flags.multi_gpu)
+
+    # Exports a saved model for the given classifier.
+    input_receiver_fn = export.build_tensor_serving_input_receiver_fn(
+        shape, batch_size=flags.batch_size)
+    classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
+
+
+def warn_on_multi_gpu_export(multi_gpu=False):
+  """For the time being, multi-GPU mode does not play nicely with exporting."""
+  if multi_gpu:
+    tf.logging.warning(
+        'You are exporting a SavedModel while in multi-GPU mode. Note that '
+        'the resulting SavedModel will require the same GPUs be available.'
+        'If you wish to serve the SavedModel from a different device, '
+        'try exporting the SavedModel with multi-GPU mode turned off.')
+

 class ResnetArgParser(argparse.ArgumentParser):
-  """Arguments for configuring and running a Resnet Model.
-  """
+  """Arguments for configuring and running a Resnet Model."""

  def __init__(self, resnet_size_choices=None):
    super(ResnetArgParser, self).__init__(parents=[
        parsers.BaseParser(),
        parsers.PerformanceParser(),
        parsers.ImageModelParser(),
+        parsers.ExportParser(),
+        parsers.BenchmarkParser(),
    ])

    self.add_argument(

--- a/official/utils/arg_parsers/parsers.py
+++ b/official/utils/arg_parsers/parsers.py
@@ -131,7 +131,7 @@ class BaseParser(argparse.ArgumentParser):
               "of train hooks. "
               "Example: --hooks LoggingTensorHook ExamplesPerSecondHook. "
               "Allowed hook names (case-insensitive): LoggingTensorHook, "
-               "ProfilerHook, ExamplesPerSecondHook. "
+               "ProfilerHook, ExamplesPerSecondHook, LoggingMetricHook."
               "See official.utils.logging.hooks_helper for details.",
          metavar="<HK>"
      )
@@ -224,3 +224,70 @@ class ImageModelParser(argparse.ArgumentParser):
               "was built for CPU or GPU.",
          metavar="<CF>"
      )
+
+
+class ExportParser(argparse.ArgumentParser):
+  """Parsing options for exporting saved models or other graph defs.
+
+  This is a separate parser for now, but should be made part of BaseParser
+  once all models are brought up to speed.
+
+  Args:
+    add_help: Create the "--help" flag. False if class instance is a parent.
+    export_dir: Create a flag to specify where a SavedModel should be exported.
+  """
+
+  def __init__(self, add_help=False, export_dir=True):
+    super(ExportParser, self).__init__(add_help=add_help)
+    if export_dir:
+      self.add_argument(
+          "--export_dir", "-ed",
+          help="[default: %(default)s] If set, a SavedModel serialization of "
+               "the model will be exported to this directory at the end of "
+               "training. See the README for more details and relevant links.",
+          metavar="<ED>"
+      )
+
+
+class BenchmarkParser(argparse.ArgumentParser):
+  """Default parser for benchmark logging.
+
+  Args:
+    add_help: Create the "--help" flag. False if class instance is a parent.
+    benchmark_log_dir: Create a flag to specify location for benchmark logging.
+  """
+
+  def __init__(self, add_help=False, benchmark_log_dir=True,
+               bigquery_uploader=True):
+    super(BenchmarkParser, self).__init__(add_help=add_help)
+    if benchmark_log_dir:
+      self.add_argument(
+          "--benchmark_log_dir", "-bld", default=None,
+          help="[default: %(default)s] The location of the benchmark logging.",
+          metavar="<BLD>"
+      )
+    if bigquery_uploader:
+      self.add_argument(
+          "--gcp_project", "-gp", default=None,
+          help="[default: %(default)s] The GCP project name where the benchmark"
+               " will be uploaded.",
+          metavar="<GP>"
+      )
+      self.add_argument(
+          "--bigquery_data_set", "-bds", default="test_benchmark",
+          help="[default: %(default)s] The Bigquery dataset name where the"
+               " benchmark will be uploaded.",
+          metavar="<BDS>"
+      )
+      self.add_argument(
+          "--bigquery_run_table", "-brt", default="benchmark_run",
+          help="[default: %(default)s] The Bigquery table name where the"
+               " benchmark run information will be uploaded.",
+          metavar="<BRT>"
+      )
+      self.add_argument(
+          "--bigquery_metric_table", "-bmt", default="benchmark_metric",
+          help="[default: %(default)s] The Bigquery table name where the"
+               " benchmark metric information will be uploaded.",
+          metavar="<BMT>"
+      )
--- a/official/utils/arg_parsers/parsers_test.py
+++ b/official/utils/arg_parsers/parsers_test.py
@@ -28,7 +28,8 @@ class TestParser(argparse.ArgumentParser):
        parsers.BaseParser(),
        parsers.PerformanceParser(num_parallel_calls=True, inter_op=True,
                                  intra_op=True, use_synthetic_data=True),
-        parsers.ImageModelParser(data_format=True)
+        parsers.ImageModelParser(data_format=True),
+        parsers.BenchmarkParser(benchmark_log_dir=True, bigquery_uploader=True)
    ])


@@ -58,6 +59,20 @@ class BaseTester(unittest.TestCase):
    for key, value in defaults.items():
      assert namespace_vars[key] == value

+  def test_benchmark_setting(self):
+    defaults = dict(
+        hooks=["LoggingMetricHook"],
+        benchmark_log_dir="/tmp/12345",
+        gcp_project="project_abc",
+    )
+
+    parser = TestParser()
+    parser.set_defaults(**defaults)
+
+    namespace_vars = vars(parser.parse_args([]))
+    for key, value in defaults.items():
+      assert namespace_vars[key] == value
+
  def test_booleans(self):
    """Test to ensure boolean flags trigger as expected.
    """

--- a/official/utils/export/__init__.py
+++ b/official/utils/export/__init__.py
--- a/official/utils/export/export.py
+++ b/official/utils/export/export.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Convenience functions for exporting models as SavedModels or other types."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+def build_tensor_serving_input_receiver_fn(shape, dtype=tf.float32,
+                                           batch_size=1):
+  """Returns a input_receiver_fn that can be used during serving.
+
+  This expects examples to come through as float tensors, and simply
+  wraps them as TensorServingInputReceivers.
+
+  Arguably, this should live in tf.estimator.export. Testing here first.
+
+  Args:
+    shape: list representing target size of a single example.
+    dtype: the expected datatype for the input example
+    batch_size: number of input tensors that will be passed for prediction
+
+  Returns:
+    A function that itself returns a TensorServingInputReceiver.
+  """
+  def serving_input_receiver_fn():
+    # Prep a placeholder where the input example will be fed in
+    features = tf.placeholder(
+        dtype=dtype, shape=[batch_size] + shape, name='input_tensor')
+
+    return tf.estimator.export.TensorServingInputReceiver(
+        features=features, receiver_tensors=features)
+
+  return serving_input_receiver_fn
--- a/official/utils/export/export_test.py
+++ b/official/utils/export/export_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for exporting utils."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf  # pylint: disable=g-bad-import-order
+
+from official.utils.export import export
+
+
+class ExportUtilsTest(tf.test.TestCase):
+  """Tests for the ExportUtils."""
+
+  def test_build_tensor_serving_input_receiver_fn(self):
+    receiver_fn = export.build_tensor_serving_input_receiver_fn(shape=[4, 5])
+    with tf.Graph().as_default():
+      receiver = receiver_fn()
+      self.assertIsInstance(
+          receiver, tf.estimator.export.TensorServingInputReceiver)
+
+      self.assertIsInstance(receiver.features, tf.Tensor)
+      self.assertEqual(receiver.features.shape, tf.TensorShape([1, 4, 5]))
+      self.assertEqual(receiver.features.dtype, tf.float32)
+      self.assertIsInstance(receiver.receiver_tensors, dict)
+      # Note that Python 3 can no longer index .values() directly; cast to list.
+      self.assertEqual(list(receiver.receiver_tensors.values())[0].shape,
+                       tf.TensorShape([1, 4, 5]))
+
+  def test_build_tensor_serving_input_receiver_fn_batch_dtype(self):
+    receiver_fn = export.build_tensor_serving_input_receiver_fn(
+        shape=[4, 5], dtype=tf.int8, batch_size=10)
+
+    with tf.Graph().as_default():
+      receiver = receiver_fn()
+      self.assertIsInstance(
+          receiver, tf.estimator.export.TensorServingInputReceiver)
+
+      self.assertIsInstance(receiver.features, tf.Tensor)
+      self.assertEqual(receiver.features.shape, tf.TensorShape([10, 4, 5]))
+      self.assertEqual(receiver.features.dtype, tf.int8)
+      self.assertIsInstance(receiver.receiver_tensors, dict)
+      # Note that Python 3 can no longer index .values() directly; cast to list.
+      self.assertEqual(list(receiver.receiver_tensors.values())[0].shape,
+                       tf.TensorShape([10, 4, 5]))
+
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/official/utils/logging/benchmark_uploader.py
+++ b/official/utils/logging/benchmark_uploader.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Library to upload benchmark generated by BenchmarkLogger to remote repo.
+
+This library require google cloud bigquery lib as dependency, which can be
+installed with:
+  > pip install --upgrade google-cloud-bigquery
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+import sys
+import uuid
+
+from google.cloud import bigquery
+
+import tensorflow as tf # pylint: disable=g-bad-import-order
+
+from official.utils.arg_parsers import parsers
+from official.utils.logging import logger
+
+
+class BigQueryUploader(object):
+  """Upload the benchmark and metric info to BigQuery."""
+
+  def __init__(self, logging_dir, gcp_project=None, credentials=None):
+    """Initialized BigQueryUploader with proper setting.
+
+    Args:
+      logging_dir: string, logging directory that contains the benchmark log.
+      gcp_project: string, the name of the GCP project that the log will be
+        uploaded to. The default project name will be detected from local
+        environment if no value is provided.
+      credentials: google.auth.credentials. The credential to access the
+        BigQuery service. The default service account credential will be
+        detected from local environment if no value is provided. Please use
+        google.oauth2.service_account.Credentials to load credential from local
+        file for the case that the test is run out side of GCP.
+    """
+    self._logging_dir = logging_dir
+    self._bq_client = bigquery.Client(
+        project=gcp_project, credentials=credentials)
+
+  def upload_benchmark_run(self, dataset_name, table_name, run_id):
+    """Upload benchmark run information to Bigquery.
+
+    Args:
+      dataset_name: string, the name of bigquery dataset where the data will be
+        uploaded.
+      table_name: string, the name of bigquery table under the dataset where
+        the data will be uploaded.
+      run_id: string, a unique ID that will be attached to the data, usually
+        this is a UUID4 format.
+    """
+    expected_file = os.path.join(
+        self._logging_dir, logger.BENCHMARK_RUN_LOG_FILE_NAME)
+    with tf.gfile.GFile(expected_file) as f:
+      benchmark_json = json.load(f)
+      benchmark_json["model_id"] = run_id
+      table_ref = self._bq_client.dataset(dataset_name).table(table_name)
+      errors = self._bq_client.insert_rows_json(table_ref, [benchmark_json])
+      if errors:
+        tf.logging.error(
+            "Failed to upload benchmark info to bigquery: {}".format(errors))
+
+  def upload_metric(self, dataset_name, table_name, run_id):
+    """Upload metric information to Bigquery.
+
+    Args:
+      dataset_name: string, the name of bigquery dataset where the data will be
+        uploaded.
+      table_name: string, the name of bigquery table under the dataset where
+        the metric data will be uploaded. This is different from the
+        benchmark_run table.
+      run_id: string, a unique ID that will be attached to the data, usually
+        this is a UUID4 format. This should be the same as the benchmark run_id.
+    """
+    expected_file = os.path.join(
+        self._logging_dir, logger.METRIC_LOG_FILE_NAME)
+    with tf.gfile.GFile(expected_file) as f:
+      lines = f.readlines()
+      metrics = []
+      for line in filter(lambda l: l.strip(), lines):
+        metric = json.loads(line)
+        metric["run_id"] = run_id
+        metrics.append(metric)
+      table_ref = self._bq_client.dataset(dataset_name).table(table_name)
+      errors = self._bq_client.insert_rows_json(table_ref, metrics)
+      if errors:
+        tf.logging.error(
+            "Failed to upload benchmark info to bigquery: {}".format(errors))
+
+
+def main(argv):
+  parser = parsers.BenchmarkParser()
+  flags = parser.parse_args(args=argv[1:])
+  if not flags.benchmark_log_dir:
+    print("Usage: benchmark_uploader.py --benchmark_log_dir=/some/dir")
+    sys.exit(1)
+
+  uploader = BigQueryUploader(
+      flags.benchmark_log_dir,
+      gcp_project=flags.gcp_project)
+  run_id = str(uuid.uuid4())
+  uploader.upload_benchmark_run(
+      flags.bigquery_data_set, flags.bigquery_run_table, run_id)
+  uploader.upload_metric(
+      flags.bigquery_data_set, flags.bigquery_metric_table, run_id)
+
+
+if __name__ == "__main__":
+  main(argv=sys.argv)
--- a/official/utils/logging/hooks_helper.py
+++ b/official/utils/logging/hooks_helper.py
@@ -27,6 +27,7 @@ from __future__ import print_function
 import tensorflow as tf  # pylint: disable=g-bad-import-order

 from official.utils.logging import hooks
+from official.utils.logging import metric_hook

 _TENSORS_TO_LOG = dict((x, x) for x in ['learning_rate',
                                        'cross_entropy',
@@ -122,9 +123,37 @@ def get_examples_per_second_hook(every_n_steps=100,
                                     warm_steps=warm_steps)


+def get_logging_metric_hook(benchmark_log_dir=None,
+                            tensors_to_log=None,
+                            every_n_secs=600,
+                            **kwargs):  # pylint: disable=unused-argument
+  """Function to get LoggingMetricHook.
+
+  Args:
+    benchmark_log_dir: `string`, directory path to save the metric log.
+    tensors_to_log: List of tensor names or dictionary mapping labels to tensor
+      names. If not set, log _TENSORS_TO_LOG by default.
+    every_n_secs: `int`, the frequency for logging the metric. Default to every
+      10 mins.
+
+  Returns:
+    Returns a ProfilerHook that writes out timelines that can be loaded into
+    profiling tools like chrome://tracing.
+  """
+  if benchmark_log_dir is None:
+    raise ValueError("metric_log_dir should be provided to use metric logger")
+  if tensors_to_log is None:
+    tensors_to_log = _TENSORS_TO_LOG
+  return metric_hook.LoggingMetricHook(
+      tensors=tensors_to_log,
+      log_dir=benchmark_log_dir,
+      every_n_secs=every_n_secs)
+
+
 # A dictionary to map one hook name and its corresponding function
 HOOKS = {
    'loggingtensorhook': get_logging_tensor_hook,
    'profilerhook': get_profiler_hook,
    'examplespersecondhook': get_examples_per_second_hook,
+    'loggingmetrichook': get_logging_metric_hook,
 }
--- a/official/utils/logging/hooks_helper_test.py
+++ b/official/utils/logging/hooks_helper_test.py
@@ -49,16 +49,19 @@ class BaseTest(unittest.TestCase):
                     expected_hook_name)

  def test_get_train_hooks_logging_tensor_hook(self):
-    test_hook_name = 'LoggingTensorHook'
-    self.validate_train_hook_name(test_hook_name, 'loggingtensorhook')
+    self.validate_train_hook_name('LoggingTensorHook', 'loggingtensorhook')

  def test_get_train_hooks_profiler_hook(self):
-    test_hook_name = 'ProfilerHook'
-    self.validate_train_hook_name(test_hook_name, 'profilerhook')
+    self.validate_train_hook_name('ProfilerHook', 'profilerhook')

  def test_get_train_hooks_examples_per_second_hook(self):
-    test_hook_name = 'ExamplesPerSecondHook'
-    self.validate_train_hook_name(test_hook_name, 'examplespersecondhook')
+    self.validate_train_hook_name('ExamplesPerSecondHook',
+                                  'examplespersecondhook')
+
+  def test_get_logging_metric_hook(self):
+    test_hook_name = 'LoggingMetricHook'
+    self.validate_train_hook_name(test_hook_name, 'loggingmetrichook',
+                                  benchmark_log_dir='/tmp')

 if __name__ == '__main__':
  tf.test.main()
--- a/official/utils/logging/logger.py
+++ b/official/utils/logging/logger.py
@@ -13,19 +13,26 @@
 # limitations under the License.
 # ==============================================================================

-"""Logging utilities for benchmark."""
+"""Logging utilities for benchmark.
+
+For collecting local environment metrics like CPU and memory, certain python
+packages need be installed. See README for details.
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import datetime
 import json
+import multiprocessing
 import numbers
 import os

 import tensorflow as tf
+from tensorflow.python.client import device_lib

-_METRIC_LOG_FILE_NAME = "metric.log"
+METRIC_LOG_FILE_NAME = "metric.log"
+BENCHMARK_RUN_LOG_FILE_NAME = "benchmark_run.log"
 _DATE_TIME_FORMAT_PATTERN = "%Y-%m-%dT%H:%M:%S.%fZ"


@@ -37,6 +44,25 @@ class BenchmarkLogger(object):
    if not tf.gfile.IsDirectory(self._logging_dir):
      tf.gfile.MakeDirs(self._logging_dir)

+  def log_estimator_evaluation_result(self, eval_results):
+    """Log the evaluation result for a estimator.
+
+    The evaluate result is a directory that contains metrics defined in
+    model_fn. It also contains a entry for global_step which contains the value
+    of the global step when evaluation was performed.
+
+    Args:
+      eval_results: dict, the result of evaluate() from a estimator.
+    """
+    if not isinstance(eval_results, dict):
+      tf.logging.warning("eval_results should be directory for logging. Got %s",
+                         type(eval_results))
+      return
+    global_step = eval_results[tf.GraphKeys.GLOBAL_STEP]
+    for key in sorted(eval_results):
+      if key != tf.GraphKeys.GLOBAL_STEP:
+        self.log_metric(key, eval_results[key], global_step=global_step)
+
  def log_metric(self, name, value, unit=None, global_step=None, extras=None):
    """Log the benchmark metric information to local file.

@@ -55,9 +81,12 @@ class BenchmarkLogger(object):
      tf.logging.warning(
          "Metric value to log should be a number. Got %s", type(value))
      return
-
+    if extras:
+      extras = [{"name": k, "value": v} for k, v in sorted(extras.items())]
+    else:
+      extras = []
    with tf.gfile.GFile(
-        os.path.join(self._logging_dir, _METRIC_LOG_FILE_NAME), "a") as f:
+        os.path.join(self._logging_dir, METRIC_LOG_FILE_NAME), "a") as f:
      metric = {
          "name": name,
          "value": float(value),
@@ -72,3 +101,96 @@ class BenchmarkLogger(object):
      except (TypeError, ValueError) as e:
        tf.logging.warning("Failed to dump metric to log file: "
                           "name %s, value %s, error %s", name, value, e)
+
+  def log_run_info(self, model_name):
+    """Collect most of the TF runtime information for the local env.
+
+    The schema of the run info follows official/benchmark/datastore/schema.
+
+    Args:
+      model_name: string, the name of the model.
+    """
+    run_info = {
+        "model_name": model_name,
+        "machine_config": {},
+        "run_date": datetime.datetime.now().strftime(_DATE_TIME_FORMAT_PATTERN)}
+    _collect_tensorflow_info(run_info)
+    _collect_tensorflow_environment_variables(run_info)
+    _collect_cpu_info(run_info)
+    _collect_gpu_info(run_info)
+    _collect_memory_info(run_info)
+
+    with tf.gfile.GFile(os.path.join(
+        self._logging_dir, BENCHMARK_RUN_LOG_FILE_NAME), "w") as f:
+      try:
+        json.dump(run_info, f)
+        f.write("\n")
+      except (TypeError, ValueError) as e:
+        tf.logging.warning("Failed to dump benchmark run info to log file: %s",
+                           e)
+
+
+def _collect_tensorflow_info(run_info):
+  run_info["tensorflow_version"] = {
+      "version": tf.VERSION, "git_hash": tf.GIT_VERSION}
+
+
+def _collect_tensorflow_environment_variables(run_info):
+  run_info["tensorflow_environment_variables"] = [
+      {"name": k, "value": v}
+      for k, v in sorted(os.environ.items()) if k.startswith("TF_")]
+
+
+# The following code is mirrored from tensorflow/tools/test/system_info_lib
+# which is not exposed for import.
+def _collect_cpu_info(run_info):
+  """Collect the CPU information for the local environment."""
+  cpu_info = {}
+
+  cpu_info["num_cores"] = multiprocessing.cpu_count()
+
+  # Note: cpuinfo is not installed in the TensorFlow OSS tree.
+  # It is installable via pip.
+  import cpuinfo    # pylint: disable=g-import-not-at-top
+
+  info = cpuinfo.get_cpu_info()
+  cpu_info["cpu_info"] = info["brand"]
+  cpu_info["mhz_per_cpu"] = info["hz_advertised_raw"][0] / 1.0e6
+
+  run_info["machine_config"]["cpu_info"] = cpu_info
+
+
+def _collect_gpu_info(run_info):
+  """Collect local GPU information by TF device library."""
+  gpu_info = {}
+  local_device_protos = device_lib.list_local_devices()
+
+  gpu_info["count"] = len([d for d in local_device_protos
+                           if d.device_type == "GPU"])
+  # The device description usually is a JSON string, which contains the GPU
+  # model info, eg:
+  # "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0"
+  for d in local_device_protos:
+    if d.device_type == "GPU":
+      gpu_info["model"] = _parse_gpu_model(d.physical_device_desc)
+      # Assume all the GPU connected are same model
+      break
+  run_info["machine_config"]["gpu_info"] = gpu_info
+
+
+def _collect_memory_info(run_info):
+  # Note: psutil is not installed in the TensorFlow OSS tree.
+  # It is installable via pip.
+  import psutil   # pylint: disable=g-import-not-at-top
+  vmem = psutil.virtual_memory()
+  run_info["machine_config"]["memory_total"] = vmem.total
+  run_info["machine_config"]["memory_available"] = vmem.available
+
+
+def _parse_gpu_model(physical_device_desc):
+  # Assume all the GPU connected are same model
+  for kv in physical_device_desc.split(","):
+    k, _, v = kv.partition(":")
+    if k.strip() == "name":
+      return v.strip()
+  return None