Merge pull request #1063 from tensorflow/update-models-1.0

Converted the models repo to TF 1.0 using the upgrade script

Merge pull request #1063 from tensorflow/update-models-1.0
Converted the models repo to TF 1.0 using the upgrade script
4bd29ac0 · Neal Wu · GitHub · 836ea272 · b41ff7f1 · 4bd29ac0
Commit 4bd29ac0 authored Mar 13, 2017 by Neal Wu Committed by GitHub Mar 13, 2017
20 changed files
--- a/inception/inception/slim/ops.py
+++ b/inception/inception/slim/ops.py
@@ -331,9 +331,9 @@ def one_hot_encoding(labels, num_classes, scope=None):
    batch_size = labels.get_shape()[0]
    indices = tf.expand_dims(tf.range(0, batch_size), 1)
    labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype)
-    concated = tf.concat([indices, labels], 1)
+    concated = tf.concat(axis=1, values=[indices, labels])
    onehot_labels = tf.sparse_to_dense(
-        concated, tf.pack([batch_size, num_classes]), 1.0, 0.0)
+        concated, tf.stack([batch_size, num_classes]), 1.0, 0.0)
    onehot_labels.set_shape([batch_size, num_classes])
    return onehot_labels


--- a/inception/inception/slim/variables.py
+++ b/inception/inception/slim/variables.py
@@ -240,7 +240,7 @@ def global_step(device=''):
    # Get the device for the variable.
    with tf.device(variable_device(device, 'global_step')):
      return tf.get_variable('global_step', shape=[], dtype=tf.int64,
-                             initializer=tf.zeros_initializer,
+                             initializer=tf.zeros_initializer(),
                             trainable=False, collections=collections)



--- a/namignizer/model.py
+++ b/namignizer/model.py
@@ -64,7 +64,7 @@ class NamignizerModel(object):
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)

-        output = tf.reshape(tf.concat(outputs, 1), [-1, size])
+        output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b

--- a/namignizer/names.py
+++ b/namignizer/names.py
@@ -251,9 +251,9 @@ def namignator(checkpoint_path, config):


 if __name__ == "__main__":
-    # train("data/SmallNames.txt", "model/namignizer", SmallConfig)
+    train("data/SmallNames.txt", "model/namignizer", SmallConfig)

-    # namignize(["mary", "ida", "gazorbazorb", "mmmhmm", "bob"],
-    #     tf.train.latest_checkpoint("model"), SmallConfig)
+    namignize(["mary", "ida", "gazorbazorb", "mmmhmm", "bob"],
+        tf.train.latest_checkpoint("model"), SmallConfig)

-    # namignator(tf.train.latest_checkpoint("model"), SmallConfig)
+    namignator(tf.train.latest_checkpoint("model"), SmallConfig)
--- a/neural_gpu/neural_gpu.py
+++ b/neural_gpu/neural_gpu.py
@@ -36,7 +36,7 @@ def conv_linear(args, kw, kh, nin, nout, rate, do_bias, bias_start, prefix):
    if len(args) == 1:
      arg = args[0]
    else:
-      arg = tf.concat(args, 3)
+      arg = tf.concat(axis=3, values=args)
    res = tf.nn.convolution(arg, k, dilation_rate=(rate, 1), padding="SAME")
    if not do_bias: return res
    with tf.device("/cpu:0"):
@@ -71,14 +71,14 @@ def place_at14(decided, selected, it):
  """Place selected at it-th coordinate of decided, dim=1 of 4."""
  slice1 = decided[:, :it, :, :]
  slice2 = decided[:, it + 1:, :, :]
-  return tf.concat([slice1, selected, slice2], 1)
+  return tf.concat(axis=1, values=[slice1, selected, slice2])


 def place_at13(decided, selected, it):
  """Place selected at it-th coordinate of decided, dim=1 of 3."""
  slice1 = decided[:, :it, :]
  slice2 = decided[:, it + 1:, :]
-  return tf.concat([slice1, selected, slice2], 1)
+  return tf.concat(axis=1, values=[slice1, selected, slice2])


 def tanh_cutoff(x, cutoff):
@@ -211,7 +211,7 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
  # beam_val is [batch_size x beam_size]; let b = batch_size * beam_size
  # decided is len x b x a x b
  # output is b x out_size; step is b x len x a x b;
-  outputs = tf.split(tf.nn.log_softmax(output), beam_size, 0)
+  outputs = tf.split(axis=0, num_or_size_splits=beam_size, value=tf.nn.log_softmax(output))
  all_beam_vals, all_beam_idx = [], []
  beam_range = 1 if is_first else beam_size
  for i in xrange(beam_range):
@@ -221,9 +221,9 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
                                 cur_beam_val], "GREPO", summarize=8)
    all_beam_vals.append(top_out + tf.expand_dims(cur_beam_val, 1))
    all_beam_idx.append(top_out_idx)
-  all_beam_idx = tf.reshape(tf.transpose(tf.concat(all_beam_idx, 1), [1, 0]),
+  all_beam_idx = tf.reshape(tf.transpose(tf.concat(axis=1, values=all_beam_idx), [1, 0]),
                            [-1])
-  top_beam, top_beam_idx = tf.nn.top_k(tf.concat(all_beam_vals, 1), k=beam_size)
+  top_beam, top_beam_idx = tf.nn.top_k(tf.concat(axis=1, values=all_beam_vals), k=beam_size)
  top_beam_idx = tf.Print(top_beam_idx, [top_beam, top_beam_idx],
                          "GREP", summarize=8)
  reordered = [[] for _ in xrange(len(tensors_to_reorder) + 1)]
@@ -236,8 +236,8 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
    reordered[0].append(tf.gather(output, which_beam))
    for i, t in enumerate(tensors_to_reorder):
      reordered[i + 1].append(tf.gather(t, which_beam))
-  new_tensors = [tf.concat(t, 0) for t in reordered]
-  top_out_idx = tf.concat(top_out_idx, 0)
+  new_tensors = [tf.concat(axis=0, values=t) for t in reordered]
+  top_out_idx = tf.concat(axis=0, values=top_out_idx)
  return (top_beam, new_tensors[0], top_out_idx, new_tensors[1:])


@@ -266,9 +266,9 @@ class NeuralGPU(object):
    self.input = tf.placeholder(tf.int32, name="inp")
    self.target = tf.placeholder(tf.int32, name="tgt")
    self.prev_step = tf.placeholder(tf.float32, name="prev_step")
-    gpu_input = tf.split(self.input, num_gpus, 0)
-    gpu_target = tf.split(self.target, num_gpus, 0)
-    gpu_prev_step = tf.split(self.prev_step, num_gpus, 0)
+    gpu_input = tf.split(axis=0, num_or_size_splits=num_gpus, value=self.input)
+    gpu_target = tf.split(axis=0, num_or_size_splits=num_gpus, value=self.target)
+    gpu_prev_step = tf.split(axis=0, num_or_size_splits=num_gpus, value=self.prev_step)
    batch_size = tf.shape(gpu_input[0])[0]

    if backward:
@@ -410,7 +410,7 @@ class NeuralGPU(object):
      out_write = output_ta.write(it, output_l[:batch_size, :, :, :])
      output = tf.gather(target_emb_weights, out)
      output = tf.reshape(output, [-1, 1, nmaps])
-      output = tf.concat([output] * height, 1)
+      output = tf.concat(axis=1, values=[output] * height)
      tgt = tgts[it, :, :, :]
      selected = tf.cond(tf.less(tf.random_uniform([]), self.sampling),
                         lambda: output, lambda: tgt)
@@ -419,7 +419,7 @@ class NeuralGPU(object):
      out_idx = place_at13(
          out_idx, tf.reshape(out, [beam_size * batch_size, 1, 1]), it)
      if mem_size > 0:
-        mem = tf.concat([mem] * height, 2)
+        mem = tf.concat(axis=2, values=[mem] * height)
        dec_write = place_at14(dec_write, mem, it_incr)
      return (step, dec_write, out_write, mloss + mem_loss, nupd_in + nupd,
              out_idx, beam_cost)
@@ -459,7 +459,7 @@ class NeuralGPU(object):
                                              gpu_targets_tn)
              embedded_targets_tn = tf.transpose(
                  embedded_targets_tn, [2, 0, 1, 3])  # len x b x 1 x nmaps
-              embedded_targets_tn = tf.concat([embedded_targets_tn] * height, 2)
+              embedded_targets_tn = tf.concat(axis=2, values=[embedded_targets_tn] * height)

        # First image comes from start by applying convolution and adding 0s.
        start = tf.transpose(start, [0, 2, 1, 3])  # Now b x len x h x vec_s
@@ -505,7 +505,7 @@ class NeuralGPU(object):
              attn_res = attention_query(attn_q, tf.get_variable(
                  "attn_v", [height * nmaps],
                  initializer=tf.random_uniform_initializer(-0.1, 0.1)))
-              concatenated = tf.reshape(tf.concat([cell_inp, attn_res], 1),
+              concatenated = tf.reshape(tf.concat(axis=1, values=[cell_inp, attn_res]),
                                        [batch_size, 2 * height * nmaps])
              cell_inp = tf.layers.dense(
                  concatenated, height * nmaps, name="attn_merge")
@@ -519,14 +519,14 @@ class NeuralGPU(object):
                res = tf.gather(target_emb_weights, res)
                res *= tf.expand_dims(mask[:, 0], 1)
                output = tf.layers.dense(
-                    tf.concat([output, res], 1), height * nmaps, name="rnnmem")
+                    tf.concat(axis=1, values=[output, res]), height * nmaps, name="rnnmem")

              return new_state, output, mem_loss
            # pylint: enable=cell-var-from-loop
            gpu_targets = tf.squeeze(gpu_target[gpu], [1])  # b x len
            gpu_tgt_trans = tf.transpose(gpu_targets, [1, 0])
            dec_zero = tf.zeros([batch_size, 1], dtype=tf.int32)
-            dec_inp = tf.concat([dec_zero, gpu_targets], 1)
+            dec_inp = tf.concat(axis=1, values=[dec_zero, gpu_targets])
            dec_inp = dec_inp[:, :length]
            embedded_dec_inp = tf.gather(target_emb_weights, dec_inp)
            embedded_dec_inp_proj = tf.layers.dense(
@@ -573,9 +573,9 @@ class NeuralGPU(object):
                                  height, vec_size])

            # Prepare for beam search.
-            tgts = tf.concat([embedded_targets_tn] * beam_size, 1)
+            tgts = tf.concat(axis=1, values=[embedded_targets_tn] * beam_size)
            beam_cost = tf.zeros([batch_size, beam_size])
-            step = tf.concat([step] * beam_size, 0)
+            step = tf.concat(axis=0, values=[step] * beam_size)
            # First step hard-coded.
            step, decided_t, output_ta, mem_loss, nupd, oi, bc = dec_step(
                step, 0, 0, decided_t, output_ta, tgts, 0.0, 0, out_idx,
@@ -654,7 +654,7 @@ class NeuralGPU(object):
                       % (gpu, time.time() - start_time))

    self.updates = []
-    self.after_enc_step = tf.concat(self.after_enc_step, 0)  # Concat GPUs.
+    self.after_enc_step = tf.concat(axis=0, values=self.after_enc_step)  # Concat GPUs.
    if backward:
      tf.get_variable_scope()._reuse = False
      tf.get_variable_scope().set_caching_device(None)
@@ -667,10 +667,10 @@ class NeuralGPU(object):

    self.losses = [gpu_avg([gpu_losses[g][i] for g in xrange(num_gpus)])
                   for i in xrange(len(gpu_losses[0]))]
-    self.out_idx = tf.concat(gpu_out_idx, 0)
+    self.out_idx = tf.concat(axis=0, values=gpu_out_idx)
    self.grad_norms = [gpu_avg([gpu_grad_norms[g][i] for g in xrange(num_gpus)])
                       for i in xrange(len(gpu_grad_norms[0]))]
-    self.outputs = [tf.concat([gpu_outputs[g] for g in xrange(num_gpus)], 1)]
+    self.outputs = [tf.concat(axis=1, values=[gpu_outputs[g] for g in xrange(num_gpus)])]
    self.quantize_op = quantize_weights_op(512, 8)
    if backward:
      self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

--- a/neural_programmer/data_utils.py
+++ b/neural_programmer/data_utils.py
--- a/neural_programmer/model.py
+++ b/neural_programmer/model.py
@@ -121,14 +121,14 @@ class Graph():
      if (self.utility.FLAGS.rnn_dropout > 0.0):
        question_hidden = question_hidden * rnn_dropout_mask
      hidden_vectors.append(tf.expand_dims(question_hidden, 0))
-    hidden_vectors = tf.concat(0, hidden_vectors)
+    hidden_vectors = tf.concat(axis=0, values=hidden_vectors)
    return question_hidden, hidden_vectors

  def history_recurrent_step(self, curr_hprev, hprev):
    #A single RNN step for controller or history RNN
    return tf.tanh(
        tf.matmul(
-            tf.concat(1, [hprev, curr_hprev]), self.params[
+            tf.concat(axis=1, values=[hprev, curr_hprev]), self.params[
                "history_recurrent"])) + self.params["history_recurrent_bias"]

  def question_number_softmax(self, hidden_vectors):
@@ -150,13 +150,13 @@ class Graph():
                            tf.expand_dims(
                                tf.transpose(self.batch_ordinal_question_one), 2
                            ), [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
-      question_number_softmax = tf.nn.softmax(tf.concat(1, [first, second]))
+      question_number_softmax = tf.nn.softmax(tf.concat(axis=1, values=[first, second]))
      if (self.mode == "test"):
        cond = tf.equal(question_number_softmax,
                        tf.reshape(
                            tf.reduce_max(question_number_softmax, 1),
                            [self.batch_size, 1]))
-        question_number_softmax = tf.select(
+        question_number_softmax = tf.where(
            cond,
            tf.fill(tf.shape(question_number_softmax), 1.0),
            tf.fill(tf.shape(question_number_softmax), 0.0))
@@ -164,7 +164,7 @@ class Graph():
                                          self.data_type)
      ans = tf.reshape(
          tf.reduce_sum(question_number_softmax * tf.concat(
-              1, [self.batch_question_number, self.batch_question_number_one]),
+              axis=1, values=[self.batch_question_number, self.batch_question_number_one]),
                        1), [self.batch_size, 1])
      return ans

@@ -225,7 +225,7 @@ class Graph():
    column_controller_vector = nn_utils.apply_dropout(
        column_controller_vector, self.utility.FLAGS.dropout, self.mode)
    self.full_column_hidden_vectors = tf.concat(
-        1, [self.column_hidden_vectors, self.word_column_hidden_vectors])
+        axis=1, values=[self.column_hidden_vectors, self.word_column_hidden_vectors])
    self.full_column_hidden_vectors += self.summary_text_entry_embeddings
    self.full_column_hidden_vectors = nn_utils.apply_dropout(
        self.full_column_hidden_vectors, self.utility.FLAGS.dropout, self.mode)
@@ -258,7 +258,7 @@ class Graph():
          temp_ans.append(curr_prob)
        else:
          temp_ans.append(tf.zeros_like(curr_prob))
-      temp_ans = tf.transpose(tf.concat(0, temp_ans))
+      temp_ans = tf.transpose(tf.concat(axis=0, values=temp_ans))
      answer += temp_ans
    return answer

@@ -266,7 +266,7 @@ class Graph():
    #converts soft selection to hard selection. used at test time
    cond = tf.equal(
        softmax, tf.reshape(tf.reduce_max(softmax, 1), [self.batch_size, 1]))
-    softmax = tf.select(
+    softmax = tf.where(
        cond, tf.fill(tf.shape(softmax), 1.0), tf.fill(tf.shape(softmax), 0.0))
    softmax = tf.cast(softmax, self.data_type)
    return softmax
@@ -297,7 +297,7 @@ class Graph():
      curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2)
      curr_prob = curr_prob * tf.expand_dims(
          tf.cast((1 - sum_prob) > 0.0, self.data_type), 2)
-      answer = tf.select(select_mask, curr_prob, answer)
+      answer = tf.where(select_mask, curr_prob, answer)
      sum_prob += tf.reduce_sum(curr_prob, 2)
    return answer

@@ -335,11 +335,11 @@ class Graph():
                               1)  #BS * max_elements
    select_min = tf.reduce_sum(init_min * select_full_column_softmax,
                               1)  #BS * max_elements
-    select_prev = tf.concat(1, [
+    select_prev = tf.concat(axis=1, values=[
        tf.slice(select, [0, 1], [self.batch_size, self.max_elements - 1]),
        tf.cast(tf.zeros([self.batch_size, 1]), self.data_type)
    ])
-    select_next = tf.concat(1, [
+    select_next = tf.concat(axis=1, values=[
        tf.cast(tf.zeros([self.batch_size, 1]), self.data_type), tf.slice(
            select, [0, 0], [self.batch_size, self.max_elements - 1])
    ])
@@ -352,11 +352,11 @@ class Graph():
    length_content = 1
    length_select = 13
    length_print = 1
-    values = tf.concat(1, [count])
+    values = tf.concat(axis=1, values=[count])
    softmax_content = tf.slice(softmax, [0, 0],
                               [self.batch_size, length_content])
    #compute scalar output
-    output = tf.reduce_sum(tf.mul(softmax_content, values), 1)
+    output = tf.reduce_sum(tf.multiply(softmax_content, values), 1)
    #compute lookup answer
    softmax_print = tf.slice(softmax, [0, length_content + length_select],
                             [self.batch_size, length_print])
@@ -384,7 +384,7 @@ class Graph():
    ]
    select = tf.reduce_sum(
        tf.tile(tf.expand_dims(softmax_select, 2), [1, 1, self.max_elements]) *
-        tf.concat(1, select_lists), 1)
+        tf.concat(axis=1, values=select_lists), 1)
    select = select * self.select_whole_mask
    return output, select

@@ -396,11 +396,11 @@ class Graph():
        self.batch_question_attention_mask)  #batch_size * embedding_dims
    controller_vector = tf.nn.relu(
        tf.matmul(hprev, self.params["controller_prev"]) + tf.matmul(
-            tf.concat(1, [question_embedding, attention_vector]), self.params[
+            tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[
                "controller"]))
    column_controller_vector = tf.nn.relu(
        tf.matmul(hprev, self.params["column_controller_prev"]) + tf.matmul(
-            tf.concat(1, [question_embedding, attention_vector]), self.params[
+            tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[
                "column_controller"]))
    controller_vector = nn_utils.apply_dropout(
        controller_vector, self.utility.FLAGS.dropout, self.mode)
@@ -413,7 +413,7 @@ class Graph():
        tf.matmul(tf.transpose(self.params_unit), tf.transpose(softmax)))
    column_controller_vector = tf.nn.relu(
        tf.matmul(
-            tf.concat(1, [
+            tf.concat(axis=1, values=[
                column_controller_vector, weighted_op_representation
            ]), self.params["break_conditional"]))
    full_column_softmax = self.compute_column_softmax(column_controller_vector,
@@ -429,7 +429,7 @@ class Graph():
  def compute_lookup_error(self, val):
    #computes lookup error.
    cond = tf.equal(self.batch_print_answer, val)
-    inter = tf.select(
+    inter = tf.where(
        cond, self.init_print_error,
        tf.tile(
            tf.reshape(tf.constant(1e10, self.data_type), [1, 1, 1]), [
@@ -450,12 +450,12 @@ class Graph():

  def error_computation(self):
    #computes the error of each example in a batch
-    math_error = 0.5 * tf.square(tf.sub(self.scalar_output, self.batch_answer))
+    math_error = 0.5 * tf.square(tf.subtract(self.scalar_output, self.batch_answer))
    #scale math error
    math_error = math_error / self.rows
    math_error = tf.minimum(math_error, self.utility.FLAGS.max_math_error *
                            tf.ones(tf.shape(math_error), self.data_type))
-    self.init_print_error = tf.select(
+    self.init_print_error = tf.where(
        self.batch_gold_select, -1 * tf.log(self.batch_lookup_answer + 1e-300 +
                                            self.invert_select_full_mask), -1 *
        tf.log(1 - self.batch_lookup_answer)) * self.select_full_mask
@@ -466,24 +466,24 @@ class Graph():
      print_error += self.compute_lookup_error(val + 0.0)
    print_error = print_error * self.utility.FLAGS.print_cost / self.num_entries
    if (self.mode == "train"):
-      error = tf.select(
+      error = tf.where(
          tf.logical_and(
              tf.not_equal(self.batch_answer, 0.0),
              tf.not_equal(
                  tf.reduce_sum(tf.reduce_sum(self.batch_print_answer, 1), 1),
                  0.0)),
          self.soft_min(math_error, print_error),
-          tf.select(
+          tf.where(
              tf.not_equal(self.batch_answer, 0.0), math_error, print_error))
    else:
-      error = tf.select(
+      error = tf.where(
          tf.logical_and(
              tf.equal(self.scalar_output, 0.0),
              tf.equal(
                  tf.reduce_sum(tf.reduce_sum(self.batch_lookup_answer, 1), 1),
                  0.0)),
          tf.ones_like(math_error),
-          tf.select(
+          tf.where(
              tf.equal(self.scalar_output, 0.0), print_error, math_error))
    return error

@@ -558,7 +558,7 @@ class Graph():
      input_col = tf.reduce_sum(
          tf.expand_dims(soft_column_softmax, 2) *
          self.full_column_hidden_vectors, 1)
-      history_input = tf.concat(1, [input_op, input_col])
+      history_input = tf.concat(axis=1, values=[input_op, input_col])
      history_input = nn_utils.apply_dropout(
          history_input, self.utility.FLAGS.dropout, self.mode)
      hprev = self.history_recurrent_step(history_input, hprev)
@@ -567,7 +567,7 @@ class Graph():
    self.scalar_output = output
    error = self.error_computation()
    cond = tf.less(error, 0.0001, name="cond")
-    correct_add = tf.select(
+    correct_add = tf.where(
        cond, tf.fill(tf.shape(cond), 1.0), tf.fill(tf.shape(cond), 0.0))
    correct = tf.reduce_sum(correct_add)
    error = error / batch_size
@@ -579,11 +579,11 @@ class Graph():
    #Sets mask variables and performs batch processing
    self.batch_gold_select = self.batch_print_answer > 0.0
    self.full_column_mask = tf.concat(
-        1, [self.batch_number_column_mask, self.batch_word_column_mask])
+        axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask])
    self.full_processed_column = tf.concat(
-        1,
-        [self.batch_processed_number_column, self.batch_processed_word_column])
-    self.full_processed_sorted_index_column = tf.concat(1, [
+        axis=1,
+        values=[self.batch_processed_number_column, self.batch_processed_word_column])
+    self.full_processed_sorted_index_column = tf.concat(axis=1, values=[
        self.batch_processed_sorted_index_number_column,
        self.batch_processed_sorted_index_word_column
    ])
@@ -603,7 +603,7 @@ class Graph():
            tf.equal(self.batch_word_column_entry_mask,
                     self.utility.dummy_token_id)), self.data_type)
    self.select_full_mask = tf.concat(
-        1, [self.select_mask, self.select_word_mask])
+        axis=1, values=[self.select_mask, self.select_word_mask])
    self.select_whole_mask = tf.maximum(
        tf.reshape(
            tf.slice(self.select_mask, [0, 0, 0],
@@ -614,7 +614,7 @@ class Graph():
                     [self.batch_size, 1, self.max_elements]),
            [self.batch_size, self.max_elements]))
    self.invert_select_full_mask = tf.cast(
-        tf.concat(1, [
+        tf.concat(axis=1, values=[
            tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int),
            tf.equal(self.batch_word_column_entry_mask,
                     self.utility.dummy_token_id)

--- a/neural_programmer/neural_programmer.py
+++ b/neural_programmer/neural_programmer.py
--- a/neural_programmer/nn_utils.py
+++ b/neural_programmer/nn_utils.py
--- a/neural_programmer/parameters.py
+++ b/neural_programmer/parameters.py
--- a/neural_programmer/wiki_data.py
+++ b/neural_programmer/wiki_data.py
--- a/next_frame_prediction/cross_conv/model.py
+++ b/next_frame_prediction/cross_conv/model.py
@@ -65,7 +65,7 @@ class CrossConvModel(object):
      diff = diff * 2.0 - self.params['scale']
      diff_output = self.diff_output * 2.0 - self.params['scale']
      concat_image = tf.concat(
-          1, [image, image + diff_output, image + diff, diff_output])
+          axis=1, values=[image, image + diff_output, image + diff, diff_output])
      tf.summary.image('origin_predict_expect_predictdiff', concat_image)
      self.summary_op = tf.summary.merge_all()
      return self.loss
@@ -113,7 +113,7 @@ class CrossConvModel(object):
    assert shape[1] == shape[2] and shape[1] == 128
    batch_size = shape[0]

-    net = tf.concat(3, [image, diff])
+    net = tf.concat(axis=3, values=[image, diff])
    with tf.variable_scope('motion_encoder'):
      with slim.arg_scope([slim.conv2d], padding='VALID'):
        net = slim.conv2d(net, 96, [5, 5], stride=1)
@@ -128,7 +128,7 @@ class CrossConvModel(object):

        z = tf.reshape(net, shape=[batch_size, -1])
        self.z_mean, self.z_stddev_log = tf.split(
-            split_dim=1, num_split=2, value=z)
+            axis=1, num_or_size_splits=2, value=z)
        self.z_stddev = tf.exp(self.z_stddev_log)

        epsilon = tf.random_normal(
@@ -174,7 +174,7 @@ class CrossConvModel(object):
  def _CrossConv(self, encoded_images):
    """Apply the motion kernel on the encoded_images."""
    cross_conved_images = []
-    kernels = tf.split(split_dim=3, num_split=4, value=self.kernel)
+    kernels = tf.split(axis=3, num_or_size_splits=4, value=self.kernel)
    for (i, encoded_image) in enumerate(encoded_images):
      with tf.variable_scope('cross_conv_%d' % i):
        kernel = kernels[i]
@@ -187,7 +187,7 @@ class CrossConvModel(object):
        for j in xrange(len(encoded_image)):
          conved_image.append(self._CrossConvHelper(
              encoded_image[j], kernel[j]))
-        cross_conved_images.append(tf.concat(0, conved_image))
+        cross_conved_images.append(tf.concat(axis=0, values=conved_image))
        sys.stderr.write('cross_conved shape: %s\n' %
                         cross_conved_images[-1].get_shape())
    return cross_conved_images
@@ -224,7 +224,7 @@ class CrossConvModel(object):
        nets.append(self._Deconv(
            cross_conved_image, 64, kernel_size=3, stride=stride))

-    net = tf.concat(3, nets)
+    net = tf.concat(axis=3, values=nets)
    net = slim.conv2d(net, 128, [9, 9], padding='SAME', stride=1)
    net = slim.conv2d(net, 128, [1, 1], padding='SAME', stride=1)
    net = slim.conv2d(net, 3, [1, 1], padding='SAME', stride=1)

--- a/next_frame_prediction/cross_conv/reader.py
+++ b/next_frame_prediction/cross_conv/reader.py
@@ -42,7 +42,7 @@ def SequenceToImageAndDiff(images):
    for i in xrange(0, len(resized_images)-1):
      diffs.append(resized_images[i+1] - resized_images[i])
    image_diff_list.append(
-        (tf.concat(0, resized_images[:-1]), tf.concat(0, diffs)))
+        (tf.concat(axis=0, values=resized_images[:-1]), tf.concat(axis=0, values=diffs)))
  return image_diff_list



--- a/real_nvp/real_nvp_multiscale_dataset.py
+++ b/real_nvp/real_nvp_multiscale_dataset.py
@@ -332,7 +332,7 @@ def masked_conv_aff_coupling(input_, mask_in, dim, name,
                     residual_blocks=residual_blocks,
                     bottleneck=bottleneck, skip=skip)
        mask = tf.mod(mask_channel + mask, 2)
-        res = tf.split(res, 2, 3)
+        res = tf.split(axis=3, num_or_size_splits=2, value=res)
        shift, log_rescaling = res[-2], res[-1]
        scale = variable_on_cpu(
            "rescaling_scale", [],
@@ -486,9 +486,9 @@ def conv_ch_aff_coupling(input_, dim, name,
            scope.reuse_variables()

        if change_bottom:
-            input_, canvas = tf.split(input_, 2, 3)
+            input_, canvas = tf.split(axis=3, num_or_size_splits=2, value=input_)
        else:
-            canvas, input_ = tf.split(input_, 2, 3)
+            canvas, input_ = tf.split(axis=3, num_or_size_splits=2, value=input_)
        shape = input_.get_shape().as_list()
        batch_size = shape[0]
        height = shape[1]
@@ -509,7 +509,7 @@ def conv_ch_aff_coupling(input_, dim, name,
                     train=train, weight_norm=weight_norm,
                     residual_blocks=residual_blocks,
                     bottleneck=bottleneck, skip=skip)
-        shift, log_rescaling = tf.split(res, 2, 3)
+        shift, log_rescaling = tf.split(axis=3, num_or_size_splits=2, value=res)
        scale = variable_on_cpu(
            "scale", [],
            tf.constant_initializer(1.))
@@ -570,9 +570,9 @@ def conv_ch_add_coupling(input_, dim, name,
            scope.reuse_variables()

        if change_bottom:
-            input_, canvas = tf.split(input_, 2, 3)
+            input_, canvas = tf.split(axis=3, num_or_size_splits=2, value=input_)
        else:
-            canvas, input_ = tf.split(input_, 2, 3)
+            canvas, input_ = tf.split(axis=3, num_or_size_splits=2, value=input_)
        shape = input_.get_shape().as_list()
        channels = shape[3]
        res = input_
@@ -736,8 +736,8 @@ def rec_masked_conv_coupling(input_, hps, scale_idx, n_scale,
                log_diff_1 = log_diff[:, :, :, :channels]
                log_diff_2 = log_diff[:, :, :, channels:]
            else:
-                res_1, res_2 = tf.split(res, 2, 3)
-                log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
+                res_1, res_2 = tf.split(axis=3, num_or_size_splits=2, value=res)
+                log_diff_1, log_diff_2 = tf.split(axis=3, num_or_size_splits=2, value=log_diff)
            res_1, inc_log_diff = rec_masked_conv_coupling(
                input_=res_1, hps=hps, scale_idx=scale_idx + 1, n_scale=n_scale,
                use_batch_norm=use_batch_norm, weight_norm=weight_norm,
@@ -798,8 +798,8 @@ def rec_masked_deconv_coupling(input_, hps, scale_idx, n_scale,
                log_diff_1 = log_diff[:, :, :, :channels]
                log_diff_2 = log_diff[:, :, :, channels:]
            else:
-                res_1, res_2 = tf.split(res, 2, 3)
-                log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
+                res_1, res_2 = tf.split(axis=3, num_or_size_splits=2, value=res)
+                log_diff_1, log_diff_2 = tf.split(axis=3, num_or_size_splits=2, value=log_diff)
            res_1, log_diff_1 = rec_masked_deconv_coupling(
                input_=res_1, hps=hps,
                scale_idx=scale_idx + 1, n_scale=n_scale,
@@ -1305,7 +1305,7 @@ class RealNVP(object):
            z_lost = z_complete
            for scale_idx in xrange(hps.n_scale - 1):
                z_lost = squeeze_2x2_ordered(z_lost)
-                z_lost, _ = tf.split(z_lost, 2, 3)
+                z_lost, _ = tf.split(axis=3, num_or_size_splits=2, value=z_lost)
                z_compressed = z_lost
                z_noisy = z_lost
                for _ in xrange(scale_idx + 1):

--- a/real_nvp/real_nvp_utils.py
+++ b/real_nvp/real_nvp_utils.py
@@ -99,8 +99,8 @@ def conv_layer(input_,
                    filter_size[1] - input_.get_shape().as_list()[2],
                    input_.get_shape().as_list()[3]
                ])
-                res = tf.concat(1, [pad_1, res])
-                res = tf.concat(2, [pad_2, res])
+                res = tf.concat(axis=1, values=[pad_1, res])
+                res = tf.concat(axis=2, values=[pad_2, res])
        res = tf.nn.conv2d(
            input=res,
            filter=weights,
@@ -139,8 +139,8 @@ def depool_2x2(input_, stride=2):
    channels = shape[3]
    res = tf.reshape(input_, [batch_size, height, 1, width, 1, channels])
    res = tf.concat(
-        2, [res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
-    res = tf.concat(4, [
+        axis=2, values=[res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
+    res = tf.concat(axis=4, values=[
        res, tf.zeros([batch_size, height, stride, width, stride - 1, channels])
    ])
    res = tf.reshape(res, [batch_size, stride * height, stride * width, channels])
@@ -158,11 +158,11 @@ def batch_random_flip(input_):
    height = shape[1]
    width = shape[2]
    channels = shape[3]
-    res = tf.split(0, batch_size, input_)
+    res = tf.split(axis=0, num_or_size_splits=batch_size, value=input_)
    res = [elem[0, :, :, :] for elem in res]
    res = [tf.image.random_flip_left_right(elem) for elem in res]
    res = [tf.reshape(elem, [1, height, width, channels]) for elem in res]
-    res = tf.concat(0, res)
+    res = tf.concat(axis=0, values=res)

    return res

@@ -175,7 +175,7 @@ def as_one_hot(input_, n_indices):
    n_elem = numpy.prod(shape)
    indices = tf.range(n_elem)
    indices = tf.cast(indices, tf.int64)
-    indices_input = tf.concat(0, [indices, tf.reshape(input_, [-1])])
+    indices_input = tf.concat(axis=0, values=[indices, tf.reshape(input_, [-1])])
    indices_input = tf.reshape(indices_input, [2, -1])
    indices_input = tf.transpose(indices_input)
    res = tf.sparse_to_dense(

--- a/street/python/nn_ops.py
+++ b/street/python/nn_ops.py
@@ -92,7 +92,7 @@ def rnn_helper(inp,
    elif direction == "backward":
      out = backward
    else:
-      out = tf.concat(2, [forward, backward])
+      out = tf.concat(axis=2, values=[forward, backward])
  return out


@@ -183,7 +183,7 @@ def lstm_layer(inp,
  with tf.variable_scope(name):
    if backward:
      if length is None:
-        inp = tf.reverse(inp, [False, True, False])
+        inp = tf.reverse(inp, [1])
      else:
        inp = tf.reverse_sequence(inp, length, 1, 0)

@@ -217,14 +217,14 @@ def lstm_layer(inp,

    batch_size = shapes.tensor_dim(inp, dim=0)
    num_frames = shapes.tensor_dim(inp, dim=1)
-    prev = tf.reshape(inp, tf.pack([batch_size * num_frames, num_prev]))
+    prev = tf.reshape(inp, tf.stack([batch_size * num_frames, num_prev]))

    if use_native_weights:
      with tf.variable_scope("LSTMCell"):
        b = tf.get_variable(
            "B",
            shape=[4 * num_nodes],
-            initializer=tf.zeros_initializer,
+            initializer=tf.zeros_initializer(),
            dtype=tf.float32)
      biases = tf.identity(b, name="biases")
    else:
@@ -236,17 +236,17 @@ def lstm_layer(inp,
              biases, name="biases_reg"))
    prev = tf.nn.xw_plus_b(prev, w_i_m, biases)

-    prev = tf.reshape(prev, tf.pack([batch_size, num_frames, 4, num_nodes]))
+    prev = tf.reshape(prev, tf.stack([batch_size, num_frames, 4, num_nodes]))
    if state is None:
-      state = tf.fill(tf.pack([batch_size, num_nodes]), 0.0)
+      state = tf.fill(tf.stack([batch_size, num_nodes]), 0.0)
    if memory is None:
-      memory = tf.fill(tf.pack([batch_size, num_nodes]), 0.0)
+      memory = tf.fill(tf.stack([batch_size, num_nodes]), 0.0)

    out, _, mem = rnn.variable_lstm(prev, state, memory, w_m_m, clip=clip)

    if backward:
      if length is None:
-        out = tf.reverse(out, [False, True, False])
+        out = tf.reverse(out, [1])
      else:
        out = tf.reverse_sequence(out, length, 1, 0)


--- a/street/python/vgsl_input.py
+++ b/street/python/vgsl_input.py
@@ -79,7 +79,7 @@ def ImageInput(input_pattern, num_threads, shape, using_ctc, reader=None):
  # Give the images a nice name as well.
  images = tf.identity(images, name='Images')

-  tf.image_summary('Images', images)
+  tf.summary.image('Images', images)
  return images, heights, widths, labels, sparse_labels, truths


@@ -145,6 +145,6 @@ def _ImageProcessing(image_buffer, shape):
  image = tf.image.decode_png(image_buffer, channels=shape.depth)
  image.set_shape([shape.height, shape.width, shape.depth])
  image = tf.cast(image, tf.float32)
-  image = tf.sub(image, 128.0)
-  image = tf.mul(image, 1 / 100.0)
+  image = tf.subtract(image, 128.0)
+  image = tf.multiply(image, 1 / 100.0)
  return image
--- a/street/python/vgsl_model.py
+++ b/street/python/vgsl_model.py
@@ -147,7 +147,7 @@ def Eval(train_dir,
      sequence_error=None)
  with tf.Graph().as_default():
    model = InitNetwork(eval_data, model_str, 'eval', reader=reader)
-    sw = tf.train.SummaryWriter(eval_dir)
+    sw = tf.summary.FileWriter(eval_dir)

    while True:
      sess = tf.Session('')
@@ -369,7 +369,7 @@ class VGSLImageModel(object):
    if self.mode == 'train':
      # Setup loss for training.
      self.loss = self._AddLossFunction(logits, height_in, out_dims, out_func)
-      tf.scalar_summary('loss', self.loss, name='loss')
+      tf.summary.scalar('loss', self.loss)
    elif out_dims == 0:
      # Be sure the labels match the output, even in eval mode.
      self.labels = tf.slice(self.labels, [0, 0], [-1, 1])
@@ -484,7 +484,7 @@ class VGSLImageModel(object):
      opt = tf.train.AdamOptimizer(learning_rate=learn_rate_dec)
    else:
      raise ValueError('Invalid optimizer type: ' + optimizer_type)
-    tf.scalar_summary('learn_rate', learn_rate_dec, name='lr_summ')
+    tf.summary.scalar('learn_rate', learn_rate_dec)

    self.train_op = opt.minimize(
        self.loss, global_step=self.global_step, name='train')

--- a/street/python/vgslspecs.py
+++ b/street/python/vgslspecs.py
@@ -149,7 +149,7 @@ class VGSLSpecs(object):
    else:
      lengths = tf.ones_like(lengths)
    if factor != 1:
-      lengths = tf.mul(lengths, tf.cast(factor, tf.float32))
+      lengths = tf.multiply(lengths, tf.cast(factor, tf.float32))
    return tf.cast(lengths, tf.int32)

  def BuildFromString(self, prev_layer, index):
@@ -235,7 +235,7 @@ class VGSLSpecs(object):
        final_factors = self.reduction_factors
    if index == len(self.model_str):
      raise ValueError('Missing ) at end of parallel!' + self.model_str)
-    return tf.concat(num_dims - 1, layers), index + 1
+    return tf.concat(axis=num_dims - 1, values=layers), index + 1

  def AddConvLayer(self, prev_layer, index):
    """Add a single standard convolutional layer.
@@ -342,7 +342,7 @@ class VGSLSpecs(object):
        factor1 = tf.cast(self.reduction_factors[i], tf.float32)
        factor2 = tf.cast(prev_shape[i], tf.float32)
        divisor = tf.cast(result_shape[i], tf.float32)
-        self.reduction_factors[i] = tf.div(tf.mul(factor1, factor2), divisor)
+        self.reduction_factors[i] = tf.div(tf.multiply(factor1, factor2), divisor)
    return layer, m.end()

  def AddFCLayer(self, prev_layer, index):
@@ -401,7 +401,7 @@ class VGSLSpecs(object):
                            name + '_forward')
      back = self._LSTMLayer(prev_layer, 'backward', dim, True, depth,
                             name + '_reverse')
-      return tf.concat(3, [fwd, back], name=name + '_concat'), m.end()
+      return tf.concat(axis=3, values=[fwd, back], name=name + '_concat'), m.end()
    if direction == 'f':
      direction = 'forward'
    elif direction == 'r':

--- a/swivel/glove_to_shards.py
+++ b/swivel/glove_to_shards.py