Merge pull request #1538 from tfboyd/cifar_perf

Input pipeline to CPU:0 increase images/sec from 1700 to 8000 on GTX 1080

Merge pull request #1538 from tfboyd/cifar_perf
Input pipeline to CPU:0 increase images/sec from 1700 to 8000 on GTX 1080
62b33958 · Neal Wu · GitHub · 2c4fea8d · b5acc005 · 62b33958
Commit 62b33958 authored Jun 09, 2017 by Neal Wu Committed by GitHub Jun 09, 2017
Showing with 14 additions and 5 deletions

tutorials/image/cifar10/cifar10_multi_gpu_train.py tutorials/image/cifar10/cifar10_multi_gpu_train.py +10 -4

tutorials/image/cifar10/cifar10_train.py tutorials/image/cifar10/cifar10_train.py +4 -1

No files found.
--- a/tutorials/image/cifar10/cifar10_multi_gpu_train.py
+++ b/tutorials/image/cifar10/cifar10_multi_gpu_train.py
@@ -62,17 +62,17 @@ tf.app.flags.DEFINE_boolean('log_device_placement', False,
                            """Whether to log device placement.""")
-def tower_loss(scope):
+def tower_loss(scope, images, labels):
  """Calculate the total loss on a single tower running the CIFAR model.
  Args:
    scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
+    images: Images. 4D tensor of shape [batch_size, height, width, 3].
+    labels: Labels. 1D tensor of shape [batch_size].
  Returns:
     Tensor of shape [] containing the total loss for a batch of data
  """
-  # Get images and labels for CIFAR-10.
-  images, labels = cifar10.distorted_inputs()
  # Build inference Graph.
  logits = cifar10.inference(images)
@@ -160,16 +160,22 @@ def train():
    # Create an optimizer that performs gradient descent.
    opt = tf.train.GradientDescentOptimizer(lr)
+    # Get images and labels for CIFAR-10.
+    images, labels = cifar10.distorted_inputs()
+    batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
+          [images, labels], capacity=2 * FLAGS.num_gpus)
    # Calculate the gradients for each model tower.
    tower_grads = []
    with tf.variable_scope(tf.get_variable_scope()):
      for i in xrange(FLAGS.num_gpus):
        with tf.device('/gpu:%d' % i):
          with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
+            # Dequeues one batch for the GPU
+            images, labels = batch_queue.dequeue()
            # Calculate the loss for one tower of the CIFAR model. This function
            # constructs the entire CIFAR model but shares the variables across
            # all towers.
-            loss = tower_loss(scope)
+            loss = tower_loss(scope, images, labels)
            # Reuse variables for the next tower.
            tf.get_variable_scope().reuse_variables()

--- a/tutorials/image/cifar10/cifar10_train.py
+++ b/tutorials/image/cifar10/cifar10_train.py
@@ -62,7 +62,10 @@ def train():
    global_step = tf.contrib.framework.get_or_create_global_step()
    # Get images and labels for CIFAR-10.
-    images, labels = cifar10.distorted_inputs()
+    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
+    # GPU and resulting in a slow down.
+    with tf.device('/cpu:0'):
+      images, labels = cifar10.distorted_inputs()
    # Build a Graph that computes the logits predictions from the
    # inference model.