Commit 1a83863d authored by Asim Shankar's avatar Asim Shankar
Browse files

[mnist]: Updates

- Use the object-oriented tf.layers API instead of the functional one.
  The object-oriented API is particularly useful when using the model
  with eager execution.
- Update unittest to train, evaluate, and predict using the model.
- Add a micro-benchmark for measuring step-time.
  The parameters (batch_size, num_steps etc.) have NOT been tuned,
  the purpose with this code is mostly to illustrate how model
  benchmarks may be written.

These changes are made as a step towards consolidating model definitions
for different TensorFlow features (like eager execution and support for
TPUs in
https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/mnist
and
https://github.com/tensorflow/tpu-demos/tree/master/cloud_tpu/models/mnist
parent 5a1faffd
......@@ -21,6 +21,13 @@ python mnist.py
The model will begin training and will automatically evaluate itself on the
validation data.
Illustrative unittests and benchmarks can be run with:
```
python mnist_test.py
python mnist_test.py --benchmarks=.
```
## Exporting the model
You can export the model into Tensorflow [SavedModel](https://www.tensorflow.org/programmers_guide/saved_model) format by using the argument `--export_dir`:
......
......@@ -63,6 +63,7 @@ parser.add_argument(
type=str,
help='The directory where the exported SavedModel will be stored.')
def train_dataset(data_dir):
"""Returns a tf.data.Dataset yielding (image, label) pairs for training."""
data = input_data.read_data_sets(data_dir, one_hot=True).train
......@@ -75,141 +76,128 @@ def eval_dataset(data_dir):
return tf.data.Dataset.from_tensors((data.images, data.labels))
def mnist_model(inputs, mode, data_format):
"""Takes the MNIST inputs and mode and outputs a tensor of logits."""
# Input Layer
# Reshape X to 4-D tensor: [batch_size, width, height, channels]
# MNIST images are 28x28 pixels, and have one color channel
inputs = tf.reshape(inputs, [-1, 28, 28, 1])
class Model(object):
"""Class that defines a graph to recognize digits in the MNIST dataset."""
if data_format is None:
# When running on GPU, transpose the data from channels_last (NHWC) to
# channels_first (NCHW) to improve performance.
# See https://www.tensorflow.org/performance/performance_guide#data_formats
data_format = ('channels_first'
if tf.test.is_built_with_cuda() else 'channels_last')
def __init__(self, data_format):
"""Creates a model for classifying a hand-written digit.
Args:
data_format: Either 'channels_first' or 'channels_last'.
'channels_first' is typically faster on GPUs while 'channels_last' is
typically faster on CPUs. See
https://www.tensorflow.org/performance/performance_guide#data_formats
"""
if data_format == 'channels_first':
inputs = tf.transpose(inputs, [0, 3, 1, 2])
# Convolutional Layer #1
# Computes 32 features using a 5x5 filter with ReLU activation.
# Padding is added to preserve width and height.
# Input Tensor Shape: [batch_size, 28, 28, 1]
# Output Tensor Shape: [batch_size, 28, 28, 32]
conv1 = tf.layers.conv2d(
inputs=inputs,
filters=32,
kernel_size=[5, 5],
padding='same',
activation=tf.nn.relu,
data_format=data_format)
# Pooling Layer #1
# First max pooling layer with a 2x2 filter and stride of 2
# Input Tensor Shape: [batch_size, 28, 28, 32]
# Output Tensor Shape: [batch_size, 14, 14, 32]
pool1 = tf.layers.max_pooling2d(
inputs=conv1, pool_size=[2, 2], strides=2, data_format=data_format)
# Convolutional Layer #2
# Computes 64 features using a 5x5 filter.
# Padding is added to preserve width and height.
# Input Tensor Shape: [batch_size, 14, 14, 32]
# Output Tensor Shape: [batch_size, 14, 14, 64]
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding='same',
activation=tf.nn.relu,
data_format=data_format)
# Pooling Layer #2
# Second max pooling layer with a 2x2 filter and stride of 2
# Input Tensor Shape: [batch_size, 14, 14, 64]
# Output Tensor Shape: [batch_size, 7, 7, 64]
pool2 = tf.layers.max_pooling2d(
inputs=conv2, pool_size=[2, 2], strides=2, data_format=data_format)
# Flatten tensor into a batch of vectors
# Input Tensor Shape: [batch_size, 7, 7, 64]
# Output Tensor Shape: [batch_size, 7 * 7 * 64]
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
# Dense Layer
# Densely connected layer with 1024 neurons
# Input Tensor Shape: [batch_size, 7 * 7 * 64]
# Output Tensor Shape: [batch_size, 1024]
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
# Add dropout operation; 0.6 probability that element will be kept
dropout = tf.layers.dropout(
inputs=dense, rate=0.4, training=(mode == tf.estimator.ModeKeys.TRAIN))
# Logits layer
# Input Tensor Shape: [batch_size, 1024]
# Output Tensor Shape: [batch_size, 10]
logits = tf.layers.dense(inputs=dropout, units=10)
return logits
def mnist_model_fn(features, labels, mode, params):
"""Model function for MNIST."""
if mode == tf.estimator.ModeKeys.PREDICT and isinstance(features,dict):
features = features['image_raw']
logits = mnist_model(features, mode, params['data_format'])
self._input_shape = [-1, 1, 28, 28]
else:
assert data_format == 'channels_last'
self._input_shape = [-1, 28, 28, 1]
self.conv1 = tf.layers.Conv2D(
32, 5, padding='same', data_format=data_format, activation=tf.nn.relu)
self.conv2 = tf.layers.Conv2D(
64, 5, padding='same', data_format=data_format, activation=tf.nn.relu)
self.fc1 = tf.layers.Dense(1024, activation=tf.nn.relu)
self.fc2 = tf.layers.Dense(10)
self.dropout = tf.layers.Dropout(0.5)
self.max_pool2d = tf.layers.MaxPooling2D(
(2, 2), (2, 2), padding='same', data_format=data_format)
def __call__(self, inputs, training):
"""Add operations to classify a batch of input images.
Args:
inputs: A Tensor representing a batch of input images.
training: A boolean. Set to True to add operations required only when
training the classifier.
Returns:
A logits Tensor with shape [<batch_size>, 10].
"""
y = tf.reshape(inputs, self._input_shape)
y = self.conv1(y)
y = self.max_pool2d(y)
y = self.conv2(y)
y = self.max_pool2d(y)
y = tf.layers.flatten(y)
y = self.fc1(y)
y = self.dropout(y, training=training)
return self.fc2(y)
def predict_spec(model, image):
"""EstimatorSpec for predictions."""
if isinstance(image, dict):
image = image['image']
logits = model(image, training=False)
predictions = {
'classes': tf.argmax(input=logits, axis=1),
'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
'classes': tf.argmax(logits, axis=1),
'probabilities': tf.nn.softmax(logits),
}
return tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.PREDICT,
predictions=predictions,
export_outputs={
'classify': tf.estimator.export.PredictOutput(predictions)
})
if mode == tf.estimator.ModeKeys.PREDICT:
export_outputs={'classify': tf.estimator.export.PredictOutput(predictions)}
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
export_outputs=export_outputs)
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
# Configure the training op
if mode == tf.estimator.ModeKeys.TRAIN:
def train_spec(model, image, labels):
"""EstimatorSpec for training."""
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step())
else:
train_op = None
logits = model(image, training=True)
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
accuracy = tf.metrics.accuracy(
tf.argmax(labels, axis=1), predictions['classes'])
metrics = {'accuracy': accuracy}
# Create a tensor named train_accuracy for logging purposes
labels=tf.argmax(labels, axis=1), predictions=tf.argmax(logits, axis=1))
# Name the accuracy tensor 'train_accuracy' to demonstrate the
# LoggingTensorHook.
tf.identity(accuracy[1], name='train_accuracy')
tf.summary.scalar('train_accuracy', accuracy[1])
return tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.TRAIN,
loss=loss,
train_op=optimizer.minimize(loss, tf.train.get_or_create_global_step()))
def eval_spec(model, image, labels):
"""EstimatorSpec for evaluation."""
logits = model(image, training=False)
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
mode=tf.estimator.ModeKeys.EVAL,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
eval_metric_ops={
'accuracy':
tf.metrics.accuracy(
labels=tf.argmax(labels, axis=1),
predictions=tf.argmax(logits, axis=1)),
})
def model_fn(features, labels, mode, params):
"""The model_fn argument for creating an Estimator."""
model = Model(params['data_format'])
if mode == tf.estimator.ModeKeys.PREDICT:
return predict_spec(model, features)
if mode == tf.estimator.ModeKeys.TRAIN:
return train_spec(model, features, labels)
if mode == tf.estimator.ModeKeys.EVAL:
return eval_spec(model, features, labels)
def main(unused_argv):
# Create the Estimator
data_format = FLAGS.data_format
if data_format is None:
data_format = ('channels_first'
if tf.test.is_built_with_cuda() else 'channels_last')
mnist_classifier = tf.estimator.Estimator(
model_fn=mnist_model_fn,
model_fn=model_fn,
model_dir=FLAGS.model_dir,
params={
'data_format': FLAGS.data_format
'data_format': data_format
})
# Set up training hook that logs the training accuracy every 100 steps.
tensors_to_log = {'train_accuracy': 'train_accuracy'}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=100)
# Train the model
def train_input_fn():
# When choosing shuffle buffer sizes, larger sizes result in better
......@@ -221,6 +209,10 @@ def main(unused_argv):
(images, labels) = dataset.make_one_shot_iterator().get_next()
return (images, labels)
# Set up training hook that logs the training accuracy every 100 steps.
tensors_to_log = {'train_accuracy': 'train_accuracy'}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=100)
mnist_classifier.train(input_fn=train_input_fn, hooks=[logging_hook])
# Evaluate the model and print results
......@@ -233,10 +225,11 @@ def main(unused_argv):
# Export the model
if FLAGS.export_dir is not None:
image = tf.placeholder(tf.float32,[None, 28, 28])
serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
{"image_raw":image})
mnist_classifier.export_savedmodel(FLAGS.export_dir, serving_input_fn)
image = tf.placeholder(tf.float32, [None, 28, 28])
input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
'image': tf.placeholder(tf.float32, [None, 28, 28])
})
mnist_classifier.export_savedmodel(FLAGS.export_dir, input_fn)
if __name__ == '__main__':
......
......@@ -18,52 +18,76 @@ from __future__ import division
from __future__ import print_function
import tensorflow as tf
import time
import mnist
tf.logging.set_verbosity(tf.logging.ERROR)
BATCH_SIZE = 100
class BaseTest(tf.test.TestCase):
def dummy_input_fn():
image = tf.random_uniform([BATCH_SIZE, 784])
labels = tf.random_uniform([BATCH_SIZE], maxval=9, dtype=tf.int32)
return image, tf.one_hot(labels, 10)
def input_fn(self):
features = tf.random_uniform([55000, 784])
labels = tf.random_uniform([55000], maxval=9, dtype=tf.int32)
return features, tf.one_hot(labels, 10)
def mnist_model_fn_helper(self, mode):
features, labels = self.input_fn()
image_count = features.shape[0]
spec = mnist.mnist_model_fn(
features, labels, mode, {'data_format': 'channels_last'})
def make_estimator():
data_format = 'channels_last'
if tf.test.is_built_with_cuda():
data_format = 'channels_first'
return tf.estimator.Estimator(
model_fn=mnist.model_fn, params={
'data_format': data_format
})
predictions = spec.predictions
self.assertAllEqual(predictions['probabilities'].shape, (image_count, 10))
self.assertEqual(predictions['probabilities'].dtype, tf.float32)
self.assertAllEqual(predictions['classes'].shape, (image_count,))
self.assertEqual(predictions['classes'].dtype, tf.int64)
if mode != tf.estimator.ModeKeys.PREDICT:
loss = spec.loss
self.assertAllEqual(loss.shape, ())
self.assertEqual(loss.dtype, tf.float32)
class Tests(tf.test.TestCase):
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = spec.eval_metric_ops
self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ())
self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ())
self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32)
self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32)
def test_mnist(self):
classifier = make_estimator()
classifier.train(input_fn=dummy_input_fn, steps=2)
eval_results = classifier.evaluate(input_fn=dummy_input_fn, steps=1)
def test_mnist_model_fn_train_mode(self):
self.mnist_model_fn_helper(tf.estimator.ModeKeys.TRAIN)
loss = eval_results['loss']
global_step = eval_results['global_step']
accuracy = eval_results['accuracy']
self.assertEqual(loss.shape, ())
self.assertEqual(2, global_step)
self.assertEqual(accuracy.shape, ())
def test_mnist_model_fn_eval_mode(self):
self.mnist_model_fn_helper(tf.estimator.ModeKeys.EVAL)
input_fn = lambda: tf.random_uniform([3, 784])
predictions_generator = classifier.predict(input_fn)
for i in range(3):
predictions = next(predictions_generator)
self.assertEqual(predictions['probabilities'].shape, (10,))
self.assertEqual(predictions['classes'].shape, ())
def test_mnist_model_fn_predict_mode(self):
self.mnist_model_fn_helper(tf.estimator.ModeKeys.PREDICT)
class Benchmarks(tf.test.Benchmark):
def benchmark_train_step_time(self):
classifier = make_estimator()
# Run one step to warmup any use of the GPU.
classifier.train(input_fn=dummy_input_fn, steps=1)
have_gpu = tf.test.is_gpu_available()
num_steps = 1000 if have_gpu else 100
name = 'train_step_time_%s' % ('gpu' if have_gpu else 'cpu')
start = time.time()
classifier.train(input_fn=dummy_input_fn, steps=num_steps)
end = time.time()
wall_time = (end - start) / num_steps
self.report_benchmark(
iters=num_steps,
wall_time=wall_time,
name=name,
extras={
'examples_per_sec': BATCH_SIZE / wall_time
})
if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.ERROR)
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment