Merge pull request #1063 from tensorflow/update-models-1.0

Converted the models repo to TF 1.0 using the upgrade script

Merge pull request #1063 from tensorflow/update-models-1.0
Converted the models repo to TF 1.0 using the upgrade script
4bd29ac0 · Neal Wu · GitHub · 836ea272 · b41ff7f1 · 4bd29ac0
Commit 4bd29ac0 authored Mar 13, 2017 by Neal Wu Committed by GitHub Mar 13, 2017
20 changed files
--- a/autoencoder/autoencoder_models/Autoencoder.py
+++ b/autoencoder/autoencoder_models/Autoencoder.py
@@ -18,7 +18,7 @@ class Autoencoder(object):
        self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
        # cost
-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        self.optimizer = optimizer.minimize(self.cost)
        init = tf.global_variables_initializer()

--- a/autoencoder/autoencoder_models/DenoisingAutoencoder.py
+++ b/autoencoder/autoencoder_models/DenoisingAutoencoder.py
@@ -22,7 +22,7 @@ class AdditiveGaussianNoiseAutoencoder(object):
        self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
        # cost
-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        self.optimizer = optimizer.minimize(self.cost)
        init = tf.global_variables_initializer()
@@ -89,7 +89,7 @@ class MaskingNoiseAutoencoder(object):
        self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
        # cost
-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        self.optimizer = optimizer.minimize(self.cost)
        init = tf.global_variables_initializer()

--- a/autoencoder/autoencoder_models/VariationalAutoencoder.py
+++ b/autoencoder/autoencoder_models/VariationalAutoencoder.py
@@ -17,13 +17,13 @@ class VariationalAutoencoder(object):
        self.z_log_sigma_sq = tf.add(tf.matmul(self.x, self.weights['log_sigma_w1']), self.weights['log_sigma_b1'])
        # sample from gaussian distribution
-        eps = tf.random_normal(tf.pack([tf.shape(self.x)[0], self.n_hidden]), 0, 1, dtype = tf.float32)
+        eps = tf.random_normal(tf.stack([tf.shape(self.x)[0], self.n_hidden]), 0, 1, dtype = tf.float32)
-        self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))
+        self.z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))
        self.reconstruction = tf.add(tf.matmul(self.z, self.weights['w2']), self.weights['b2'])
        # cost
-        reconstr_loss = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
+        reconstr_loss = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq
                                           - tf.square(self.z_mean)
                                           - tf.exp(self.z_log_sigma_sq), 1)

--- a/compression/decoder.py
+++ b/compression/decoder.py
--- a/compression/encoder.py
+++ b/compression/encoder.py
--- a/compression/msssim.py
+++ b/compression/msssim.py
--- a/differential_privacy/dp_sgd/dp_mnist/dp_mnist.py
+++ b/differential_privacy/dp_sgd/dp_mnist/dp_mnist.py
@@ -273,7 +273,7 @@ def Train(mnist_train_file, mnist_test_file, network_parameters, num_steps,
        images, network_parameters)
    cost = tf.nn.softmax_cross_entropy_with_logits(
-        logits, tf.one_hot(labels, 10))
+        logits=logits, labels=tf.one_hot(labels, 10))
    # The actual cost is the average across the examples.
    cost = tf.reduce_sum(cost, [0]) / batch_size
@@ -343,7 +343,7 @@ def Train(mnist_train_file, mnist_test_file, network_parameters, num_steps,
    # We need to maintain the intialization sequence.
    for v in tf.trainable_variables():
-      sess.run(tf.initialize_variables([v]))
+      sess.run(tf.variables_initializer([v]))
    sess.run(tf.global_variables_initializer())
    sess.run(init_ops)

--- a/differential_privacy/dp_sgd/dp_optimizer/utils.py
+++ b/differential_privacy/dp_sgd/dp_optimizer/utils.py
@@ -236,7 +236,7 @@ def BatchClipByL2norm(t, upper_bound, name=None):
  with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name:
    saved_shape = tf.shape(t)
    batch_size = tf.slice(saved_shape, [0], [1])
-    t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]]))
+    t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
    upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
                              tf.constant(1.0/upper_bound))
    # Add a small number to avoid divide by 0
@@ -266,7 +266,7 @@ def SoftThreshold(t, threshold_ratio, name=None):
  assert threshold_ratio >= 0
  with tf.op_scope([t, threshold_ratio], name, "soft_thresholding") as name:
    saved_shape = tf.shape(t)
-    t2 = tf.reshape(t, tf.concat(0, [tf.slice(saved_shape, [0], [1]), -1]))
+    t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1]))
    t_abs = tf.abs(t2)
    t_x = tf.sign(t2) * tf.nn.relu(t_abs -
                                   (tf.reduce_mean(t_abs, [0],

--- a/differential_privacy/dp_sgd/per_example_gradients/per_example_gradients.py
+++ b/differential_privacy/dp_sgd/per_example_gradients/per_example_gradients.py
@@ -189,7 +189,7 @@ class MatMulPXG(object):
    z_grads, = z_grads
    x_expanded = tf.expand_dims(x, 2)
    z_grads_expanded = tf.expand_dims(z_grads, 1)
-    return tf.mul(x_expanded, z_grads_expanded)
+    return tf.multiply(x_expanded, z_grads_expanded)
 pxg_registry.Register("MatMul", MatMulPXG)
@@ -245,7 +245,7 @@ class Conv2DPXG(object):
      num_x = int(conv_x.get_shape()[0])
      assert num_x == 1, num_x
    assert len(conv_px) == batch_size
-    conv = tf.concat(0, conv_px)
+    conv = tf.concat(axis=0, values=conv_px)
    assert int(conv.get_shape()[0]) == batch_size
    return conv, w_px
@@ -274,7 +274,7 @@ class Conv2DPXG(object):
                                  self.colocate_gradients_with_ops,
                                  gate_gradients=self.gate_gradients)
-    return tf.pack(gradients_list)
+    return tf.stack(gradients_list)
 pxg_registry.Register("Conv2D", Conv2DPXG)

--- a/differential_privacy/multiple_teachers/deep_cnn.py
+++ b/differential_privacy/multiple_teachers/deep_cnn.py
@@ -75,7 +75,7 @@ def _variable_with_weight_decay(name, shape, stddev, wd):
  var = _variable_on_cpu(name, shape,
                         tf.truncated_normal_initializer(stddev=stddev))
  if wd is not None:
-    weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
+    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var
@@ -398,7 +398,7 @@ def train_op_fun(total_loss, global_step):
                                  decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True)
-  tf.scalar_summary('learning_rate', lr)
+  tf.summary.scalar('learning_rate', lr)
  # Generate moving averages of all losses and associated summaries.
  loss_averages_op = moving_av(total_loss)
@@ -413,7 +413,7 @@ def train_op_fun(total_loss, global_step):
  # Add histograms for trainable variables.
  for var in tf.trainable_variables():
-    tf.histogram_summary(var.op.name, var)
+    tf.summary.histogram(var.op.name, var)
  # Track the moving averages of all trainable variables.
  variable_averages = tf.train.ExponentialMovingAverage(
@@ -485,7 +485,7 @@ def train(images, labels, ckpt_path, dropout=False):
    train_op = train_op_fun(loss, global_step)
    # Create a saver.
-    saver = tf.train.Saver(tf.all_variables())
+    saver = tf.train.Saver(tf.global_variables())
    print("Graph constructed and saver created")

--- a/differential_privacy/privacy_accountant/tf/accountant.py
+++ b/differential_privacy/privacy_accountant/tf/accountant.py
@@ -361,12 +361,12 @@ class GaussianMomentsAccountant(MomentsAccountant):
    exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma)
                             for j in range(t + 1)], dtype=tf.float64)
    # x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j}
-    x = tf.mul(binomial, signs)
+    x = tf.multiply(binomial, signs)
    # y[i, j] = x[i, j] * exp(exponents[j])
    #         = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
    # Note: this computation is done by broadcasting pointwise multiplication
    # between [t+1, t+1] tensor and [t+1] tensor.
-    y = tf.mul(x, tf.exp(exponents))
+    y = tf.multiply(x, tf.exp(exponents))
    # z[i] = sum_j y[i, j]
    #      = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
    z = tf.reduce_sum(y, 1)

--- a/im2txt/im2txt/show_and_tell_model.py
+++ b/im2txt/im2txt/show_and_tell_model.py
@@ -264,7 +264,7 @@ class ShowAndTellModel(object):
      if self.mode == "inference":
        # In inference mode, use concatenated states for convenient feeding and
        # fetching.
-        tf.concat(initial_state, 1, name="initial_state")
+        tf.concat(axis=1, values=initial_state, name="initial_state")
        # Placeholder for feeding a batch of concatenated states.
        state_feed = tf.placeholder(dtype=tf.float32,
@@ -274,11 +274,11 @@ class ShowAndTellModel(object):
        # Run a single LSTM step.
        lstm_outputs, state_tuple = lstm_cell(
-            inputs=tf.squeeze(self.seq_embeddings, squeeze_dims=[1]),
+            inputs=tf.squeeze(self.seq_embeddings, axis=[1]),
            state=state_tuple)
        # Concatentate the resulting state.
-        tf.concat(state_tuple, 1, name="state")
+        tf.concat(axis=1, values=state_tuple, name="state")
      else:
        # Run the batch of sequence embeddings through the LSTM.
        sequence_length = tf.reduce_sum(self.input_mask, 1)

--- a/inception/inception/data/preprocess_imagenet_validation_data.py
+++ b/inception/inception/data/preprocess_imagenet_validation_data.py
--- a/inception/inception/data/process_bounding_boxes.py
+++ b/inception/inception/data/process_bounding_boxes.py
--- a/inception/inception/image_processing.py
+++ b/inception/inception/image_processing.py
@@ -221,7 +221,7 @@ def distort_image(image, height, width, bbox, thread_id=0, scope=None):
    if not thread_id:
      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                    bbox)
-      tf.image_summary('image_with_bounding_boxes', image_with_box)
+      tf.summary.image('image_with_bounding_boxes', image_with_box)
  # A large fraction of image datasets contain a human-annotated bounding
  # box delineating the region of the image containing the object of interest.
@@ -242,7 +242,7 @@ def distort_image(image, height, width, bbox, thread_id=0, scope=None):
    if not thread_id:
      image_with_distorted_box = tf.image.draw_bounding_boxes(
          tf.expand_dims(image, 0), distort_bbox)
-      tf.image_summary('images_with_distorted_bounding_box',
+      tf.summary.image('images_with_distorted_bounding_box',
                       image_with_distorted_box)
    # Crop the image to the specified bounding box.
@@ -259,7 +259,7 @@ def distort_image(image, height, width, bbox, thread_id=0, scope=None):
    # the third dimension.
    distorted_image.set_shape([height, width, 3])
    if not thread_id:
-      tf.image_summary('cropped_resized_image',
+      tf.summary.image('cropped_resized_image',
                       tf.expand_dims(distorted_image, 0))
    # Randomly flip the image horizontally.
@@ -269,7 +269,7 @@ def distort_image(image, height, width, bbox, thread_id=0, scope=None):
    distorted_image = distort_color(distorted_image, thread_id)
    if not thread_id:
-      tf.image_summary('final_distorted_image',
+      tf.summary.image('final_distorted_image',
                       tf.expand_dims(distorted_image, 0))
    return distorted_image
@@ -328,8 +328,8 @@ def image_preprocessing(image_buffer, bbox, train, thread_id=0):
    image = eval_image(image, height, width)
  # Finally, rescale to [-1,1] instead of [0, 1)
-  image = tf.sub(image, 0.5)
+  image = tf.subtract(image, 0.5)
-  image = tf.mul(image, 2.0)
+  image = tf.multiply(image, 2.0)
  return image
@@ -394,7 +394,7 @@ def parse_example_proto(example_serialized):
  ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
  # Note that we impose an ordering of (y, x) just to make life difficult.
-  bbox = tf.concat(0, [ymin, xmin, ymax, xmax])
+  bbox = tf.concat(axis=0, values=[ymin, xmin, ymax, xmax])
  # Force the variable number of bounding boxes into the shape
  # [1, num_boxes, coords].
@@ -505,6 +505,6 @@ def batch_inputs(dataset, batch_size, train, num_preprocess_threads=None,
    images = tf.reshape(images, shape=[batch_size, height, width, depth])
    # Display the training images in the visualizer.
-    tf.image_summary('images', images)
+    tf.summary.image('images', images)
    return images, tf.reshape(label_index_batch, [batch_size])
--- a/inception/inception/inception_distributed_train.py
+++ b/inception/inception/inception_distributed_train.py
@@ -133,7 +133,7 @@ def train(target, dataset, cluster_spec):
                                      FLAGS.learning_rate_decay_factor,
                                      staircase=True)
      # Add a summary to track the learning rate.
-      tf.scalar_summary('learning_rate', lr)
+      tf.summary.scalar('learning_rate', lr)
      # Create an optimizer that performs gradient descent.
      opt = tf.train.RMSPropOptimizer(lr,
@@ -171,8 +171,8 @@ def train(target, dataset, cluster_spec):
          loss_name = l.op.name
          # Name each loss as '(raw)' and name the moving average version of the
          # loss as the original loss name.
-          tf.scalar_summary(loss_name + ' (raw)', l)
+          tf.summary.scalar(loss_name + ' (raw)', l)
-          tf.scalar_summary(loss_name, loss_averages.average(l))
+          tf.summary.scalar(loss_name, loss_averages.average(l))
        # Add dependency to compute loss_averages.
        with tf.control_dependencies([loss_averages_op]):
@@ -191,7 +191,7 @@ def train(target, dataset, cluster_spec):
      # Add histograms for model variables.
      for var in variables_to_average:
-        tf.histogram_summary(var.op.name, var)
+        tf.summary.histogram(var.op.name, var)
      # Create synchronous replica optimizer.
      opt = tf.train.SyncReplicasOptimizer(
@@ -215,7 +215,7 @@ def train(target, dataset, cluster_spec):
      # Add histograms for gradients.
      for grad, var in grads:
        if grad is not None:
-          tf.histogram_summary(var.op.name + '/gradients', grad)
+          tf.summary.histogram(var.op.name + '/gradients', grad)
      apply_gradients_op = opt.apply_gradients(grads, global_step=global_step)
@@ -233,7 +233,7 @@ def train(target, dataset, cluster_spec):
      saver = tf.train.Saver()
      # Build the summary operation based on the TF collection of Summaries.
-      summary_op = tf.merge_all_summaries()
+      summary_op = tf.summary.merge_all()
      # Build an initialization operation to run below.
      init_op = tf.global_variables_initializer()

--- a/inception/inception/inception_eval.py
+++ b/inception/inception/inception_eval.py
@@ -158,10 +158,10 @@ def evaluate(dataset):
    saver = tf.train.Saver(variables_to_restore)
    # Build the summary operation based on the TF collection of Summaries.
-    summary_op = tf.merge_all_summaries()
+    summary_op = tf.summary.merge_all()
    graph_def = tf.get_default_graph().as_graph_def()
-    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
+    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir,
                                            graph_def=graph_def)
    while True:

--- a/inception/inception/inception_model.py
+++ b/inception/inception/inception_model.py
@@ -115,7 +115,7 @@ def loss(logits, labels, batch_size=None):
  # shape [FLAGS.batch_size, num_classes].
  sparse_labels = tf.reshape(labels, [batch_size, 1])
  indices = tf.reshape(tf.range(batch_size), [batch_size, 1])
-  concated = tf.concat(1, [indices, sparse_labels])
+  concated = tf.concat(axis=1, values=[indices, sparse_labels])
  num_classes = logits[0].get_shape()[-1].value
  dense_labels = tf.sparse_to_dense(concated,
                                    [batch_size, num_classes],
@@ -147,8 +147,8 @@ def _activation_summary(x):
  # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
  # session. This helps the clarity of presentation on tensorboard.
  tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
-  tf.contrib.deprecated.histogram_summary(tensor_name + '/activations', x)
+  tf.summary.histogram(tensor_name + '/activations', x)
-  tf.contrib.deprecated.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
+  tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
 def _activation_summaries(endpoints):

--- a/inception/inception/inception_train.py
+++ b/inception/inception/inception_train.py
@@ -132,8 +132,8 @@ def _tower_loss(images, labels, num_classes, scope, reuse_variables=None):
    loss_name = re.sub('%s_[0-9]*/' % inception.TOWER_NAME, '', l.op.name)
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
-    tf.scalar_summary(loss_name +' (raw)', l)
+    tf.summary.scalar(loss_name +' (raw)', l)
-    tf.scalar_summary(loss_name, loss_averages.average(l))
+    tf.summary.scalar(loss_name, loss_averages.average(l))
  with tf.control_dependencies([loss_averages_op]):
    total_loss = tf.identity(total_loss)
@@ -166,7 +166,7 @@ def _average_gradients(tower_grads):
      grads.append(expanded_g)
    # Average over the 'tower' dimension.
-    grad = tf.concat(0, grads)
+    grad = tf.concat(axis=0, values=grads)
    grad = tf.reduce_mean(grad, 0)
    # Keep in mind that the Variables are redundant because they are shared
@@ -223,8 +223,8 @@ def train(dataset):
    num_classes = dataset.num_classes() + 1
     # Split the batch of images and labels for towers.
-    images_splits = tf.split(0, FLAGS.num_gpus, images)
+    images_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=images)
-    labels_splits = tf.split(0, FLAGS.num_gpus, labels)
+    labels_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=labels)
    # Calculate the gradients for each model tower.
    tower_grads = []
@@ -268,20 +268,20 @@ def train(dataset):
    summaries.extend(input_summaries)
    # Add a summary to track the learning rate.
-    summaries.append(tf.scalar_summary('learning_rate', lr))
+    summaries.append(tf.summary.scalar('learning_rate', lr))
    # Add histograms for gradients.
    for grad, var in grads:
      if grad is not None:
        summaries.append(
-            tf.histogram_summary(var.op.name + '/gradients', grad))
+            tf.summary.histogram(var.op.name + '/gradients', grad))
    # Apply the gradients to adjust the shared variables.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
-      summaries.append(tf.histogram_summary(var.op.name, var))
+      summaries.append(tf.summary.histogram(var.op.name, var))
    # Track the moving averages of all trainable variables.
    # Note that we maintain a "double-average" of the BatchNormalization
@@ -301,10 +301,10 @@ def train(dataset):
                        batchnorm_updates_op)
    # Create a saver.
-    saver = tf.train.Saver(tf.all_variables())
+    saver = tf.train.Saver(tf.global_variables())
    # Build the summary operation from the last tower summaries.
-    summary_op = tf.merge_summary(summaries)
+    summary_op = tf.summary.merge(summaries)
    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()
@@ -329,7 +329,7 @@ def train(dataset):
    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)
-    summary_writer = tf.train.SummaryWriter(
+    summary_writer = tf.summary.FileWriter(
        FLAGS.train_dir,
        graph_def=sess.graph.as_graph_def(add_shapes=True))

--- a/inception/inception/slim/inception_model.py
+++ b/inception/inception/slim/inception_model.py
@@ -122,7 +122,7 @@ def inception_v3(inputs,
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 32, [1, 1])
-          net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool])
          end_points['mixed_35x35x256a'] = net
        # mixed_1: 35 x 35 x 288.
        with tf.variable_scope('mixed_35x35x288a'):
@@ -138,7 +138,7 @@ def inception_v3(inputs,
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
-          net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool])
          end_points['mixed_35x35x288a'] = net
        # mixed_2: 35 x 35 x 288.
        with tf.variable_scope('mixed_35x35x288b'):
@@ -154,7 +154,7 @@ def inception_v3(inputs,
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
-          net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool])
          end_points['mixed_35x35x288b'] = net
        # mixed_3: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768a'):
@@ -167,7 +167,7 @@ def inception_v3(inputs,
                                      stride=2, padding='VALID')
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
-          net = tf.concat([branch3x3, branch3x3dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch3x3, branch3x3dbl, branch_pool])
          end_points['mixed_17x17x768a'] = net
        # mixed4: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768b'):
@@ -186,7 +186,7 @@ def inception_v3(inputs,
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
-          net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool])
          end_points['mixed_17x17x768b'] = net
        # mixed_5: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768c'):
@@ -205,7 +205,7 @@ def inception_v3(inputs,
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
-          net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool])
          end_points['mixed_17x17x768c'] = net
        # mixed_6: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768d'):
@@ -224,7 +224,7 @@ def inception_v3(inputs,
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
-          net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool])
          end_points['mixed_17x17x768d'] = net
        # mixed_7: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768e'):
@@ -243,7 +243,7 @@ def inception_v3(inputs,
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
-          net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool])
          end_points['mixed_17x17x768e'] = net
        # Auxiliary Head logits
        aux_logits = tf.identity(end_points['mixed_17x17x768e'])
@@ -276,7 +276,7 @@ def inception_v3(inputs,
                                     stride=2, padding='VALID')
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
-          net = tf.concat([branch3x3, branch7x7x3, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch3x3, branch7x7x3, branch_pool])
          end_points['mixed_17x17x1280a'] = net
        # mixed_9: 8 x 8 x 2048.
        with tf.variable_scope('mixed_8x8x2048a'):
@@ -284,17 +284,17 @@ def inception_v3(inputs,
            branch1x1 = ops.conv2d(net, 320, [1, 1])
          with tf.variable_scope('branch3x3'):
            branch3x3 = ops.conv2d(net, 384, [1, 1])
-            branch3x3 = tf.concat([ops.conv2d(branch3x3, 384, [1, 3]),
+            branch3x3 = tf.concat(axis=3, values=[ops.conv2d(branch3x3, 384, [1, 3]),
-                                   ops.conv2d(branch3x3, 384, [3, 1])], 3)
+                                                  ops.conv2d(branch3x3, 384, [3, 1])])
          with tf.variable_scope('branch3x3dbl'):
            branch3x3dbl = ops.conv2d(net, 448, [1, 1])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
-            branch3x3dbl = tf.concat([ops.conv2d(branch3x3dbl, 384, [1, 3]),
+            branch3x3dbl = tf.concat(axis=3, values=[ops.conv2d(branch3x3dbl, 384, [1, 3]),
-                                      ops.conv2d(branch3x3dbl, 384, [3, 1])], 3)
+                                                     ops.conv2d(branch3x3dbl, 384, [3, 1])])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
-          net = tf.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch3x3, branch3x3dbl, branch_pool])
          end_points['mixed_8x8x2048a'] = net
        # mixed_10: 8 x 8 x 2048.
        with tf.variable_scope('mixed_8x8x2048b'):
@@ -302,17 +302,17 @@ def inception_v3(inputs,
            branch1x1 = ops.conv2d(net, 320, [1, 1])
          with tf.variable_scope('branch3x3'):
            branch3x3 = ops.conv2d(net, 384, [1, 1])
-            branch3x3 = tf.concat([ops.conv2d(branch3x3, 384, [1, 3]),
+            branch3x3 = tf.concat(axis=3, values=[ops.conv2d(branch3x3, 384, [1, 3]),
-                                   ops.conv2d(branch3x3, 384, [3, 1])], 3)
+                                                  ops.conv2d(branch3x3, 384, [3, 1])])
          with tf.variable_scope('branch3x3dbl'):
            branch3x3dbl = ops.conv2d(net, 448, [1, 1])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
-            branch3x3dbl = tf.concat([ops.conv2d(branch3x3dbl, 384, [1, 3]),
+            branch3x3dbl = tf.concat(axis=3, values=[ops.conv2d(branch3x3dbl, 384, [1, 3]),
-                                      ops.conv2d(branch3x3dbl, 384, [3, 1])], 3)
+                                                     ops.conv2d(branch3x3dbl, 384, [3, 1])])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
-          net = tf.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], 3)
+          net = tf.concat(axis=3, values=[branch1x1, branch3x3, branch3x3dbl, branch_pool])
          end_points['mixed_8x8x2048b'] = net
        # Final pooling and prediction
        with tf.variable_scope('logits'):