From 082e65c9342683e6dcb6df4e9922c99f0f966e90 Mon Sep 17 00:00:00 2001 From: Toby Boyd Date: Thu, 8 Jun 2017 10:08:13 -0700 Subject: [PATCH 1/5] iput pipeline on CPU. 1700 images/sec to 8000 on GTX 1080 --- tutorials/image/cifar10/cifar10_multi_gpu_train.py | 13 +++++++++---- tutorials/image/cifar10/cifar10_train.py | 5 ++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tutorials/image/cifar10/cifar10_multi_gpu_train.py b/tutorials/image/cifar10/cifar10_multi_gpu_train.py index 16033eeffd8..e97e1f8fae1 100644 --- a/tutorials/image/cifar10/cifar10_multi_gpu_train.py +++ b/tutorials/image/cifar10/cifar10_multi_gpu_train.py @@ -62,7 +62,7 @@ """Whether to log device placement.""") -def tower_loss(scope): +def tower_loss(scope, images, labels): """Calculate the total loss on a single tower running the CIFAR model. Args: @@ -71,8 +71,7 @@ def tower_loss(scope): Returns: Tensor of shape [] containing the total loss for a batch of data """ - # Get images and labels for CIFAR-10. - images, labels = cifar10.distorted_inputs() + # Build inference Graph. logits = cifar10.inference(images) @@ -160,6 +159,12 @@ def train(): # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(lr) + # Get images and labels for CIFAR-10. + # Force input pipeline to CPU:0 to avoid opertaios sometimes ending up on GPU + # and resulting in a slow down. + with tf.device('/CPU:0'): + images, labels = cifar10.distorted_inputs() + # Calculate the gradients for each model tower. tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): @@ -169,7 +174,7 @@ def train(): # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. - loss = tower_loss(scope) + loss = tower_loss(scope, images, labels) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() diff --git a/tutorials/image/cifar10/cifar10_train.py b/tutorials/image/cifar10/cifar10_train.py index fec64ec2272..da01d500149 100644 --- a/tutorials/image/cifar10/cifar10_train.py +++ b/tutorials/image/cifar10/cifar10_train.py @@ -62,7 +62,10 @@ def train(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. - images, labels = cifar10.distorted_inputs() + # Force input pipeline to CPU:0 to avoid opertaios sometimes ending up + # on GPU and resulting in a slow down. + with tf.device('/CPU:0'): + images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. From 3909e4bdff25c952713a08b4ecc31fff1fdf2cb4 Mon Sep 17 00:00:00 2001 From: Toby Boyd Date: Thu, 8 Jun 2017 10:15:06 -0700 Subject: [PATCH 2/5] pydoc update to match method signature --- tutorials/image/cifar10/cifar10_multi_gpu_train.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tutorials/image/cifar10/cifar10_multi_gpu_train.py b/tutorials/image/cifar10/cifar10_multi_gpu_train.py index e97e1f8fae1..05d92cc27ec 100644 --- a/tutorials/image/cifar10/cifar10_multi_gpu_train.py +++ b/tutorials/image/cifar10/cifar10_multi_gpu_train.py @@ -67,6 +67,8 @@ def tower_loss(scope, images, labels): Args: scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0' + images: Images. 4D tensor of [batch_size, height, width, 3] size. + labels: Labels. 1D tensor of [batch_size] size. Returns: Tensor of shape [] containing the total loss for a batch of data From c3e2ae5ec1b0164ddd3895680c249d0adb1f11a8 Mon Sep 17 00:00:00 2001 From: Toby Boyd Date: Thu, 8 Jun 2017 13:49:27 -0700 Subject: [PATCH 3/5] Fixed typos and redudant with CPU:0 --- tutorials/image/cifar10/cifar10_multi_gpu_train.py | 5 +---- tutorials/image/cifar10/cifar10_train.py | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tutorials/image/cifar10/cifar10_multi_gpu_train.py b/tutorials/image/cifar10/cifar10_multi_gpu_train.py index 05d92cc27ec..9f269cc04ab 100644 --- a/tutorials/image/cifar10/cifar10_multi_gpu_train.py +++ b/tutorials/image/cifar10/cifar10_multi_gpu_train.py @@ -162,10 +162,7 @@ def train(): opt = tf.train.GradientDescentOptimizer(lr) # Get images and labels for CIFAR-10. - # Force input pipeline to CPU:0 to avoid opertaios sometimes ending up on GPU - # and resulting in a slow down. - with tf.device('/CPU:0'): - images, labels = cifar10.distorted_inputs() + images, labels = cifar10.distorted_inputs() # Calculate the gradients for each model tower. tower_grads = [] diff --git a/tutorials/image/cifar10/cifar10_train.py b/tutorials/image/cifar10/cifar10_train.py index da01d500149..e3243527926 100644 --- a/tutorials/image/cifar10/cifar10_train.py +++ b/tutorials/image/cifar10/cifar10_train.py @@ -62,8 +62,8 @@ def train(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. - # Force input pipeline to CPU:0 to avoid opertaios sometimes ending up - # on GPU and resulting in a slow down. + # Force input pipeline to CPU:0 to avoid operations sometimes ending up on + # GPU and resulting in a slow down. with tf.device('/CPU:0'): images, labels = cifar10.distorted_inputs() From 9e8fd6d90c84df1f7444b055dcc3b653f6b7e14c Mon Sep 17 00:00:00 2001 From: Toby Boyd Date: Thu, 8 Jun 2017 15:05:06 -0700 Subject: [PATCH 4/5] Fixed typo and multi-gpu processing same batch on each gpu --- tutorials/image/cifar10/cifar10_multi_gpu_train.py | 6 +++++- tutorials/image/cifar10/cifar10_train.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tutorials/image/cifar10/cifar10_multi_gpu_train.py b/tutorials/image/cifar10/cifar10_multi_gpu_train.py index 9f269cc04ab..bc90711d7c9 100644 --- a/tutorials/image/cifar10/cifar10_multi_gpu_train.py +++ b/tutorials/image/cifar10/cifar10_multi_gpu_train.py @@ -138,6 +138,7 @@ def average_gradients(tower_grads): def train(): + print(FLAGS.batch_size) """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the @@ -163,13 +164,16 @@ def train(): # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() - + batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( + [images, labels], capacity=2 * FLAGS.num_gpus) # Calculate the gradients for each model tower. tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope: + # Dequeues one batch for the GPU + images, labels = batch_queue.dequeue() # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. diff --git a/tutorials/image/cifar10/cifar10_train.py b/tutorials/image/cifar10/cifar10_train.py index e3243527926..cc1dc0d1489 100644 --- a/tutorials/image/cifar10/cifar10_train.py +++ b/tutorials/image/cifar10/cifar10_train.py @@ -64,7 +64,7 @@ def train(): # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. - with tf.device('/CPU:0'): + with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the From b5acc005968d37495f0e7d83d2dd2ef3d3674211 Mon Sep 17 00:00:00 2001 From: Neal Wu Date: Thu, 8 Jun 2017 16:44:02 -0700 Subject: [PATCH 5/5] Code cleanup --- tutorials/image/cifar10/cifar10_multi_gpu_train.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tutorials/image/cifar10/cifar10_multi_gpu_train.py b/tutorials/image/cifar10/cifar10_multi_gpu_train.py index bc90711d7c9..fb15faca260 100644 --- a/tutorials/image/cifar10/cifar10_multi_gpu_train.py +++ b/tutorials/image/cifar10/cifar10_multi_gpu_train.py @@ -67,14 +67,13 @@ def tower_loss(scope, images, labels): Args: scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0' - images: Images. 4D tensor of [batch_size, height, width, 3] size. - labels: Labels. 1D tensor of [batch_size] size. + images: Images. 4D tensor of shape [batch_size, height, width, 3]. + labels: Labels. 1D tensor of shape [batch_size]. Returns: Tensor of shape [] containing the total loss for a batch of data """ - # Build inference Graph. logits = cifar10.inference(images) @@ -138,7 +137,6 @@ def average_gradients(tower_grads): def train(): - print(FLAGS.batch_size) """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the