Merge pull request tensorflow#1538 from tfboyd/cifar_perf

Input pipeline to CPU:0 increase images/sec from 1700 to 8000 on GTX 1080
envin3 · Jun 9, 2017 · 62b3395 · 62b3395
2 parents 2c4fea8 + b5acc00
commit 62b3395
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 5 deletions.
diff --git a/tutorials/image/cifar10/cifar10_multi_gpu_train.py b/tutorials/image/cifar10/cifar10_multi_gpu_train.py
@@ -62,17 +62,17 @@
                             """Whether to log device placement.""")
 
 
-def tower_loss(scope):
+def tower_loss(scope, images, labels):
   """Calculate the total loss on a single tower running the CIFAR model.
 
   Args:
     scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
+    images: Images. 4D tensor of shape [batch_size, height, width, 3].
+    labels: Labels. 1D tensor of shape [batch_size].
 
   Returns:
      Tensor of shape [] containing the total loss for a batch of data
   """
-  # Get images and labels for CIFAR-10.
-  images, labels = cifar10.distorted_inputs()
 
   # Build inference Graph.
   logits = cifar10.inference(images)
@@ -160,16 +160,22 @@ def train():
     # Create an optimizer that performs gradient descent.
     opt = tf.train.GradientDescentOptimizer(lr)
 
+    # Get images and labels for CIFAR-10.
+    images, labels = cifar10.distorted_inputs()
+    batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
+          [images, labels], capacity=2 * FLAGS.num_gpus)
     # Calculate the gradients for each model tower.
     tower_grads = []
     with tf.variable_scope(tf.get_variable_scope()):
       for i in xrange(FLAGS.num_gpus):
         with tf.device('/gpu:%d' % i):
           with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
+            # Dequeues one batch for the GPU
+            images, labels = batch_queue.dequeue()
             # Calculate the loss for one tower of the CIFAR model. This function
             # constructs the entire CIFAR model but shares the variables across
             # all towers.
-            loss = tower_loss(scope)
+            loss = tower_loss(scope, images, labels)
 
             # Reuse variables for the next tower.
             tf.get_variable_scope().reuse_variables()

diff --git a/tutorials/image/cifar10/cifar10_train.py b/tutorials/image/cifar10/cifar10_train.py
@@ -62,7 +62,10 @@ def train():
     global_step = tf.contrib.framework.get_or_create_global_step()
 
     # Get images and labels for CIFAR-10.
-    images, labels = cifar10.distorted_inputs()
+    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
+    # GPU and resulting in a slow down.
+    with tf.device('/cpu:0'):
+      images, labels = cifar10.distorted_inputs()
 
     # Build a Graph that computes the logits predictions from the
     # inference model.