Converted the models repo to TF 1.0 using the upgrade script

052e5e8b · Neal Wu · f21c4278 · 052e5e8b · 052e5e8b · 052e5e8b
Commit 052e5e8b authored Feb 23, 2017 by Neal Wu
20 changed files
--- a/inception/inception/slim/ops.py
+++ b/inception/inception/slim/ops.py
@@ -331,9 +331,9 @@ def one_hot_encoding(labels, num_classes, scope=None):
    batch_size = labels.get_shape()[0]
    indices = tf.expand_dims(tf.range(0, batch_size), 1)
    labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype)
-    concated = tf.concat([indices, labels], 1)
+    concated = tf.concat(axis=[indices, labels], values=1)
    onehot_labels = tf.sparse_to_dense(
-        concated, tf.pack([batch_size, num_classes]), 1.0, 0.0)
+        concated, tf.stack([batch_size, num_classes]), 1.0, 0.0)
    onehot_labels.set_shape([batch_size, num_classes])
    return onehot_labels


--- a/inception/inception/slim/variables.py
+++ b/inception/inception/slim/variables.py
@@ -240,7 +240,7 @@ def global_step(device=''):
    # Get the device for the variable.
    with tf.device(variable_device(device, 'global_step')):
      return tf.get_variable('global_step', shape=[], dtype=tf.int64,
-                             initializer=tf.zeros_initializer,
+                             initializer=tf.zeros_initializer(),
                             trainable=False, collections=collections)



--- a/namignizer/model.py
+++ b/namignizer/model.py
@@ -64,7 +64,7 @@ class NamignizerModel(object):
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)

-        output = tf.reshape(tf.concat(outputs, 1), [-1, size])
+        output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b

--- a/neural_gpu/neural_gpu.py
+++ b/neural_gpu/neural_gpu.py
@@ -36,7 +36,7 @@ def conv_linear(args, kw, kh, nin, nout, rate, do_bias, bias_start, prefix):
    if len(args) == 1:
      arg = args[0]
    else:
-      arg = tf.concat(args, 3)
+      arg = tf.concat(axis=args, values=3)
    res = tf.nn.convolution(arg, k, dilation_rate=(rate, 1), padding="SAME")
    if not do_bias: return res
    with tf.device("/cpu:0"):
@@ -71,14 +71,14 @@ def place_at14(decided, selected, it):
  """Place selected at it-th coordinate of decided, dim=1 of 4."""
  slice1 = decided[:, :it, :, :]
  slice2 = decided[:, it + 1:, :, :]
-  return tf.concat([slice1, selected, slice2], 1)
+  return tf.concat(axis=[slice1, selected, slice2], values=1)


 def place_at13(decided, selected, it):
  """Place selected at it-th coordinate of decided, dim=1 of 3."""
  slice1 = decided[:, :it, :]
  slice2 = decided[:, it + 1:, :]
-  return tf.concat([slice1, selected, slice2], 1)
+  return tf.concat(axis=[slice1, selected, slice2], values=1)


 def tanh_cutoff(x, cutoff):
@@ -211,7 +211,7 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
  # beam_val is [batch_size x beam_size]; let b = batch_size * beam_size
  # decided is len x b x a x b
  # output is b x out_size; step is b x len x a x b;
-  outputs = tf.split(tf.nn.log_softmax(output), beam_size, 0)
+  outputs = tf.split(axis=tf.nn.log_softmax(output), num_or_size_splits=beam_size, value=0)
  all_beam_vals, all_beam_idx = [], []
  beam_range = 1 if is_first else beam_size
  for i in xrange(beam_range):
@@ -221,9 +221,9 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
                                 cur_beam_val], "GREPO", summarize=8)
    all_beam_vals.append(top_out + tf.expand_dims(cur_beam_val, 1))
    all_beam_idx.append(top_out_idx)
-  all_beam_idx = tf.reshape(tf.transpose(tf.concat(all_beam_idx, 1), [1, 0]),
+  all_beam_idx = tf.reshape(tf.transpose(tf.concat(axis=all_beam_idx, values=1), [1, 0]),
                            [-1])
-  top_beam, top_beam_idx = tf.nn.top_k(tf.concat(all_beam_vals, 1), k=beam_size)
+  top_beam, top_beam_idx = tf.nn.top_k(tf.concat(axis=all_beam_vals, values=1), k=beam_size)
  top_beam_idx = tf.Print(top_beam_idx, [top_beam, top_beam_idx],
                          "GREP", summarize=8)
  reordered = [[] for _ in xrange(len(tensors_to_reorder) + 1)]
@@ -236,8 +236,8 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
    reordered[0].append(tf.gather(output, which_beam))
    for i, t in enumerate(tensors_to_reorder):
      reordered[i + 1].append(tf.gather(t, which_beam))
-  new_tensors = [tf.concat(t, 0) for t in reordered]
-  top_out_idx = tf.concat(top_out_idx, 0)
+  new_tensors = [tf.concat(axis=t, values=0) for t in reordered]
+  top_out_idx = tf.concat(axis=top_out_idx, values=0)
  return (top_beam, new_tensors[0], top_out_idx, new_tensors[1:])


@@ -266,9 +266,9 @@ class NeuralGPU(object):
    self.input = tf.placeholder(tf.int32, name="inp")
    self.target = tf.placeholder(tf.int32, name="tgt")
    self.prev_step = tf.placeholder(tf.float32, name="prev_step")
-    gpu_input = tf.split(self.input, num_gpus, 0)
-    gpu_target = tf.split(self.target, num_gpus, 0)
-    gpu_prev_step = tf.split(self.prev_step, num_gpus, 0)
+    gpu_input = tf.split(axis=self.input, num_or_size_splits=num_gpus, value=0)
+    gpu_target = tf.split(axis=self.target, num_or_size_splits=num_gpus, value=0)
+    gpu_prev_step = tf.split(axis=self.prev_step, num_or_size_splits=num_gpus, value=0)
    batch_size = tf.shape(gpu_input[0])[0]

    if backward:
@@ -410,7 +410,7 @@ class NeuralGPU(object):
      out_write = output_ta.write(it, output_l[:batch_size, :, :, :])
      output = tf.gather(target_emb_weights, out)
      output = tf.reshape(output, [-1, 1, nmaps])
-      output = tf.concat([output] * height, 1)
+      output = tf.concat(axis=[output] * height, values=1)
      tgt = tgts[it, :, :, :]
      selected = tf.cond(tf.less(tf.random_uniform([]), self.sampling),
                         lambda: output, lambda: tgt)
@@ -419,7 +419,7 @@ class NeuralGPU(object):
      out_idx = place_at13(
          out_idx, tf.reshape(out, [beam_size * batch_size, 1, 1]), it)
      if mem_size > 0:
-        mem = tf.concat([mem] * height, 2)
+        mem = tf.concat(axis=[mem] * height, values=2)
        dec_write = place_at14(dec_write, mem, it_incr)
      return (step, dec_write, out_write, mloss + mem_loss, nupd_in + nupd,
              out_idx, beam_cost)
@@ -459,7 +459,7 @@ class NeuralGPU(object):
                                              gpu_targets_tn)
              embedded_targets_tn = tf.transpose(
                  embedded_targets_tn, [2, 0, 1, 3])  # len x b x 1 x nmaps
-              embedded_targets_tn = tf.concat([embedded_targets_tn] * height, 2)
+              embedded_targets_tn = tf.concat(axis=[embedded_targets_tn] * height, values=2)

        # First image comes from start by applying convolution and adding 0s.
        start = tf.transpose(start, [0, 2, 1, 3])  # Now b x len x h x vec_s
@@ -505,7 +505,7 @@ class NeuralGPU(object):
              attn_res = attention_query(attn_q, tf.get_variable(
                  "attn_v", [height * nmaps],
                  initializer=tf.random_uniform_initializer(-0.1, 0.1)))
-              concatenated = tf.reshape(tf.concat([cell_inp, attn_res], 1),
+              concatenated = tf.reshape(tf.concat(axis=[cell_inp, attn_res], values=1),
                                        [batch_size, 2 * height * nmaps])
              cell_inp = tf.layers.dense(
                  concatenated, height * nmaps, name="attn_merge")
@@ -519,14 +519,14 @@ class NeuralGPU(object):
                res = tf.gather(target_emb_weights, res)
                res *= tf.expand_dims(mask[:, 0], 1)
                output = tf.layers.dense(
-                    tf.concat([output, res], 1), height * nmaps, name="rnnmem")
+                    tf.concat(axis=[output, res], values=1), height * nmaps, name="rnnmem")

              return new_state, output, mem_loss
            # pylint: enable=cell-var-from-loop
            gpu_targets = tf.squeeze(gpu_target[gpu], [1])  # b x len
            gpu_tgt_trans = tf.transpose(gpu_targets, [1, 0])
            dec_zero = tf.zeros([batch_size, 1], dtype=tf.int32)
-            dec_inp = tf.concat([dec_zero, gpu_targets], 1)
+            dec_inp = tf.concat(axis=[dec_zero, gpu_targets], values=1)
            dec_inp = dec_inp[:, :length]
            embedded_dec_inp = tf.gather(target_emb_weights, dec_inp)
            embedded_dec_inp_proj = tf.layers.dense(
@@ -573,9 +573,9 @@ class NeuralGPU(object):
                                  height, vec_size])

            # Prepare for beam search.
-            tgts = tf.concat([embedded_targets_tn] * beam_size, 1)
+            tgts = tf.concat(axis=[embedded_targets_tn] * beam_size, values=1)
            beam_cost = tf.zeros([batch_size, beam_size])
-            step = tf.concat([step] * beam_size, 0)
+            step = tf.concat(axis=[step] * beam_size, values=0)
            # First step hard-coded.
            step, decided_t, output_ta, mem_loss, nupd, oi, bc = dec_step(
                step, 0, 0, decided_t, output_ta, tgts, 0.0, 0, out_idx,
@@ -654,7 +654,7 @@ class NeuralGPU(object):
                       % (gpu, time.time() - start_time))

    self.updates = []
-    self.after_enc_step = tf.concat(self.after_enc_step, 0)  # Concat GPUs.
+    self.after_enc_step = tf.concat(axis=self.after_enc_step, values=0)  # Concat GPUs.
    if backward:
      tf.get_variable_scope()._reuse = False
      tf.get_variable_scope().set_caching_device(None)
@@ -667,10 +667,10 @@ class NeuralGPU(object):

    self.losses = [gpu_avg([gpu_losses[g][i] for g in xrange(num_gpus)])
                   for i in xrange(len(gpu_losses[0]))]
-    self.out_idx = tf.concat(gpu_out_idx, 0)
+    self.out_idx = tf.concat(axis=gpu_out_idx, values=0)
    self.grad_norms = [gpu_avg([gpu_grad_norms[g][i] for g in xrange(num_gpus)])
                       for i in xrange(len(gpu_grad_norms[0]))]
-    self.outputs = [tf.concat([gpu_outputs[g] for g in xrange(num_gpus)], 1)]
+    self.outputs = [tf.concat(axis=[gpu_outputs[g] for g in xrange(num_gpus)], values=1)]
    self.quantize_op = quantize_weights_op(512, 8)
    if backward:
      self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

--- a/neural_programmer/data_utils.py
+++ b/neural_programmer/data_utils.py
--- a/neural_programmer/model.py
+++ b/neural_programmer/model.py
@@ -121,14 +121,14 @@ class Graph():
      if (self.utility.FLAGS.rnn_dropout > 0.0):
        question_hidden = question_hidden * rnn_dropout_mask
      hidden_vectors.append(tf.expand_dims(question_hidden, 0))
-    hidden_vectors = tf.concat(0, hidden_vectors)
+    hidden_vectors = tf.concat(axis=0, values=hidden_vectors)
    return question_hidden, hidden_vectors

  def history_recurrent_step(self, curr_hprev, hprev):
    #A single RNN step for controller or history RNN
    return tf.tanh(
        tf.matmul(
-            tf.concat(1, [hprev, curr_hprev]), self.params[
+            tf.concat(axis=1, values=[hprev, curr_hprev]), self.params[
                "history_recurrent"])) + self.params["history_recurrent_bias"]

  def question_number_softmax(self, hidden_vectors):
@@ -150,13 +150,13 @@ class Graph():
                            tf.expand_dims(
                                tf.transpose(self.batch_ordinal_question_one), 2
                            ), [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
-      question_number_softmax = tf.nn.softmax(tf.concat(1, [first, second]))
+      question_number_softmax = tf.nn.softmax(tf.concat(axis=1, values=[first, second]))
      if (self.mode == "test"):
        cond = tf.equal(question_number_softmax,
                        tf.reshape(
                            tf.reduce_max(question_number_softmax, 1),
                            [self.batch_size, 1]))
-        question_number_softmax = tf.select(
+        question_number_softmax = tf.where(
            cond,
            tf.fill(tf.shape(question_number_softmax), 1.0),
            tf.fill(tf.shape(question_number_softmax), 0.0))
@@ -164,7 +164,7 @@ class Graph():
                                          self.data_type)
      ans = tf.reshape(
          tf.reduce_sum(question_number_softmax * tf.concat(
-              1, [self.batch_question_number, self.batch_question_number_one]),
+              axis=1, values=[self.batch_question_number, self.batch_question_number_one]),
                        1), [self.batch_size, 1])
      return ans

@@ -225,7 +225,7 @@ class Graph():
    column_controller_vector = nn_utils.apply_dropout(
        column_controller_vector, self.utility.FLAGS.dropout, self.mode)
    self.full_column_hidden_vectors = tf.concat(
-        1, [self.column_hidden_vectors, self.word_column_hidden_vectors])
+        axis=1, values=[self.column_hidden_vectors, self.word_column_hidden_vectors])
    self.full_column_hidden_vectors += self.summary_text_entry_embeddings
    self.full_column_hidden_vectors = nn_utils.apply_dropout(
        self.full_column_hidden_vectors, self.utility.FLAGS.dropout, self.mode)
@@ -258,7 +258,7 @@ class Graph():
          temp_ans.append(curr_prob)
        else:
          temp_ans.append(tf.zeros_like(curr_prob))
-      temp_ans = tf.transpose(tf.concat(0, temp_ans))
+      temp_ans = tf.transpose(tf.concat(axis=0, values=temp_ans))
      answer += temp_ans
    return answer

@@ -266,7 +266,7 @@ class Graph():
    #converts soft selection to hard selection. used at test time
    cond = tf.equal(
        softmax, tf.reshape(tf.reduce_max(softmax, 1), [self.batch_size, 1]))
-    softmax = tf.select(
+    softmax = tf.where(
        cond, tf.fill(tf.shape(softmax), 1.0), tf.fill(tf.shape(softmax), 0.0))
    softmax = tf.cast(softmax, self.data_type)
    return softmax
@@ -297,7 +297,7 @@ class Graph():
      curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2)
      curr_prob = curr_prob * tf.expand_dims(
          tf.cast((1 - sum_prob) > 0.0, self.data_type), 2)
-      answer = tf.select(select_mask, curr_prob, answer)
+      answer = tf.where(select_mask, curr_prob, answer)
      sum_prob += tf.reduce_sum(curr_prob, 2)
    return answer

@@ -335,11 +335,11 @@ class Graph():
                               1)  #BS * max_elements
    select_min = tf.reduce_sum(init_min * select_full_column_softmax,
                               1)  #BS * max_elements
-    select_prev = tf.concat(1, [
+    select_prev = tf.concat(axis=1, values=[
        tf.slice(select, [0, 1], [self.batch_size, self.max_elements - 1]),
        tf.cast(tf.zeros([self.batch_size, 1]), self.data_type)
    ])
-    select_next = tf.concat(1, [
+    select_next = tf.concat(axis=1, values=[
        tf.cast(tf.zeros([self.batch_size, 1]), self.data_type), tf.slice(
            select, [0, 0], [self.batch_size, self.max_elements - 1])
    ])
@@ -352,11 +352,11 @@ class Graph():
    length_content = 1
    length_select = 13
    length_print = 1
-    values = tf.concat(1, [count])
+    values = tf.concat(axis=1, values=[count])
    softmax_content = tf.slice(softmax, [0, 0],
                               [self.batch_size, length_content])
    #compute scalar output
-    output = tf.reduce_sum(tf.mul(softmax_content, values), 1)
+    output = tf.reduce_sum(tf.multiply(softmax_content, values), 1)
    #compute lookup answer
    softmax_print = tf.slice(softmax, [0, length_content + length_select],
                             [self.batch_size, length_print])
@@ -384,7 +384,7 @@ class Graph():
    ]
    select = tf.reduce_sum(
        tf.tile(tf.expand_dims(softmax_select, 2), [1, 1, self.max_elements]) *
-        tf.concat(1, select_lists), 1)
+        tf.concat(axis=1, values=select_lists), 1)
    select = select * self.select_whole_mask
    return output, select

@@ -396,11 +396,11 @@ class Graph():
        self.batch_question_attention_mask)  #batch_size * embedding_dims
    controller_vector = tf.nn.relu(
        tf.matmul(hprev, self.params["controller_prev"]) + tf.matmul(
-            tf.concat(1, [question_embedding, attention_vector]), self.params[
+            tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[
                "controller"]))
    column_controller_vector = tf.nn.relu(
        tf.matmul(hprev, self.params["column_controller_prev"]) + tf.matmul(
-            tf.concat(1, [question_embedding, attention_vector]), self.params[
+            tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[
                "column_controller"]))
    controller_vector = nn_utils.apply_dropout(
        controller_vector, self.utility.FLAGS.dropout, self.mode)
@@ -413,7 +413,7 @@ class Graph():
        tf.matmul(tf.transpose(self.params_unit), tf.transpose(softmax)))
    column_controller_vector = tf.nn.relu(
        tf.matmul(
-            tf.concat(1, [
+            tf.concat(axis=1, values=[
                column_controller_vector, weighted_op_representation
            ]), self.params["break_conditional"]))
    full_column_softmax = self.compute_column_softmax(column_controller_vector,
@@ -429,7 +429,7 @@ class Graph():
  def compute_lookup_error(self, val):
    #computes lookup error.
    cond = tf.equal(self.batch_print_answer, val)
-    inter = tf.select(
+    inter = tf.where(
        cond, self.init_print_error,
        tf.tile(
            tf.reshape(tf.constant(1e10, self.data_type), [1, 1, 1]), [
@@ -450,12 +450,12 @@ class Graph():

  def error_computation(self):
    #computes the error of each example in a batch
-    math_error = 0.5 * tf.square(tf.sub(self.scalar_output, self.batch_answer))
+    math_error = 0.5 * tf.square(tf.subtract(self.scalar_output, self.batch_answer))
    #scale math error
    math_error = math_error / self.rows
    math_error = tf.minimum(math_error, self.utility.FLAGS.max_math_error *
                            tf.ones(tf.shape(math_error), self.data_type))
-    self.init_print_error = tf.select(
+    self.init_print_error = tf.where(
        self.batch_gold_select, -1 * tf.log(self.batch_lookup_answer + 1e-300 +
                                            self.invert_select_full_mask), -1 *
        tf.log(1 - self.batch_lookup_answer)) * self.select_full_mask
@@ -466,24 +466,24 @@ class Graph():
      print_error += self.compute_lookup_error(val + 0.0)
    print_error = print_error * self.utility.FLAGS.print_cost / self.num_entries
    if (self.mode == "train"):
-      error = tf.select(
+      error = tf.where(
          tf.logical_and(
              tf.not_equal(self.batch_answer, 0.0),
              tf.not_equal(
                  tf.reduce_sum(tf.reduce_sum(self.batch_print_answer, 1), 1),
                  0.0)),
          self.soft_min(math_error, print_error),
-          tf.select(
+          tf.where(
              tf.not_equal(self.batch_answer, 0.0), math_error, print_error))
    else:
-      error = tf.select(
+      error = tf.where(
          tf.logical_and(
              tf.equal(self.scalar_output, 0.0),
              tf.equal(
                  tf.reduce_sum(tf.reduce_sum(self.batch_lookup_answer, 1), 1),
                  0.0)),
          tf.ones_like(math_error),
-          tf.select(
+          tf.where(
              tf.equal(self.scalar_output, 0.0), print_error, math_error))
    return error

@@ -558,7 +558,7 @@ class Graph():
      input_col = tf.reduce_sum(
          tf.expand_dims(soft_column_softmax, 2) *
          self.full_column_hidden_vectors, 1)
-      history_input = tf.concat(1, [input_op, input_col])
+      history_input = tf.concat(axis=1, values=[input_op, input_col])
      history_input = nn_utils.apply_dropout(
          history_input, self.utility.FLAGS.dropout, self.mode)
      hprev = self.history_recurrent_step(history_input, hprev)
@@ -567,7 +567,7 @@ class Graph():
    self.scalar_output = output
    error = self.error_computation()
    cond = tf.less(error, 0.0001, name="cond")
-    correct_add = tf.select(
+    correct_add = tf.where(
        cond, tf.fill(tf.shape(cond), 1.0), tf.fill(tf.shape(cond), 0.0))
    correct = tf.reduce_sum(correct_add)
    error = error / batch_size
@@ -579,11 +579,11 @@ class Graph():
    #Sets mask variables and performs batch processing
    self.batch_gold_select = self.batch_print_answer > 0.0
    self.full_column_mask = tf.concat(
-        1, [self.batch_number_column_mask, self.batch_word_column_mask])
+        axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask])
    self.full_processed_column = tf.concat(
-        1,
-        [self.batch_processed_number_column, self.batch_processed_word_column])
-    self.full_processed_sorted_index_column = tf.concat(1, [
+        axis=1,
+        values=[self.batch_processed_number_column, self.batch_processed_word_column])
+    self.full_processed_sorted_index_column = tf.concat(axis=1, values=[
        self.batch_processed_sorted_index_number_column,
        self.batch_processed_sorted_index_word_column
    ])
@@ -603,7 +603,7 @@ class Graph():
            tf.equal(self.batch_word_column_entry_mask,
                     self.utility.dummy_token_id)), self.data_type)
    self.select_full_mask = tf.concat(
-        1, [self.select_mask, self.select_word_mask])
+        axis=1, values=[self.select_mask, self.select_word_mask])
    self.select_whole_mask = tf.maximum(
        tf.reshape(
            tf.slice(self.select_mask, [0, 0, 0],
@@ -614,7 +614,7 @@ class Graph():
                     [self.batch_size, 1, self.max_elements]),
            [self.batch_size, self.max_elements]))
    self.invert_select_full_mask = tf.cast(
-        tf.concat(1, [
+        tf.concat(axis=1, values=[
            tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int),
            tf.equal(self.batch_word_column_entry_mask,
                     self.utility.dummy_token_id)

--- a/neural_programmer/neural_programmer.py
+++ b/neural_programmer/neural_programmer.py
--- a/neural_programmer/nn_utils.py
+++ b/neural_programmer/nn_utils.py
--- a/neural_programmer/parameters.py
+++ b/neural_programmer/parameters.py
--- a/neural_programmer/wiki_data.py
+++ b/neural_programmer/wiki_data.py
--- a/next_frame_prediction/cross_conv/model.py
+++ b/next_frame_prediction/cross_conv/model.py
@@ -65,7 +65,7 @@ class CrossConvModel(object):
      diff = diff * 2.0 - self.params['scale']
      diff_output = self.diff_output * 2.0 - self.params['scale']
      concat_image = tf.concat(
-          1, [image, image + diff_output, image + diff, diff_output])
+          axis=1, values=[image, image + diff_output, image + diff, diff_output])
      tf.summary.image('origin_predict_expect_predictdiff', concat_image)
      self.summary_op = tf.summary.merge_all()
      return self.loss
@@ -113,7 +113,7 @@ class CrossConvModel(object):
    assert shape[1] == shape[2] and shape[1] == 128
    batch_size = shape[0]

-    net = tf.concat(3, [image, diff])
+    net = tf.concat(axis=3, values=[image, diff])
    with tf.variable_scope('motion_encoder'):
      with slim.arg_scope([slim.conv2d], padding='VALID'):
        net = slim.conv2d(net, 96, [5, 5], stride=1)
@@ -128,7 +128,7 @@ class CrossConvModel(object):

        z = tf.reshape(net, shape=[batch_size, -1])
        self.z_mean, self.z_stddev_log = tf.split(
-            split_dim=1, num_split=2, value=z)
+            axis=1, num_or_size_splits=2, value=z)
        self.z_stddev = tf.exp(self.z_stddev_log)

        epsilon = tf.random_normal(
@@ -174,7 +174,7 @@ class CrossConvModel(object):
  def _CrossConv(self, encoded_images):
    """Apply the motion kernel on the encoded_images."""
    cross_conved_images = []
-    kernels = tf.split(split_dim=3, num_split=4, value=self.kernel)
+    kernels = tf.split(axis=3, num_or_size_splits=4, value=self.kernel)
    for (i, encoded_image) in enumerate(encoded_images):
      with tf.variable_scope('cross_conv_%d' % i):
        kernel = kernels[i]
@@ -187,7 +187,7 @@ class CrossConvModel(object):
        for j in xrange(len(encoded_image)):
          conved_image.append(self._CrossConvHelper(
              encoded_image[j], kernel[j]))
-        cross_conved_images.append(tf.concat(0, conved_image))
+        cross_conved_images.append(tf.concat(axis=0, values=conved_image))
        sys.stderr.write('cross_conved shape: %s\n' %
                         cross_conved_images[-1].get_shape())
    return cross_conved_images
@@ -224,7 +224,7 @@ class CrossConvModel(object):
        nets.append(self._Deconv(
            cross_conved_image, 64, kernel_size=3, stride=stride))

-    net = tf.concat(3, nets)
+    net = tf.concat(axis=3, values=nets)
    net = slim.conv2d(net, 128, [9, 9], padding='SAME', stride=1)
    net = slim.conv2d(net, 128, [1, 1], padding='SAME', stride=1)
    net = slim.conv2d(net, 3, [1, 1], padding='SAME', stride=1)

--- a/next_frame_prediction/cross_conv/reader.py
+++ b/next_frame_prediction/cross_conv/reader.py
@@ -42,7 +42,7 @@ def SequenceToImageAndDiff(images):
    for i in xrange(0, len(resized_images)-1):
      diffs.append(resized_images[i+1] - resized_images[i])
    image_diff_list.append(
-        (tf.concat(0, resized_images[:-1]), tf.concat(0, diffs)))
+        (tf.concat(axis=0, values=resized_images[:-1]), tf.concat(axis=0, values=diffs)))
  return image_diff_list



--- a/real_nvp/real_nvp_multiscale_dataset.py
+++ b/real_nvp/real_nvp_multiscale_dataset.py
@@ -332,7 +332,7 @@ def masked_conv_aff_coupling(input_, mask_in, dim, name,
                     residual_blocks=residual_blocks,
                     bottleneck=bottleneck, skip=skip)
        mask = tf.mod(mask_channel + mask, 2)
-        res = tf.split(res, 2, 3)
+        res = tf.split(axis=res, num_or_size_splits=2, value=3)
        shift, log_rescaling = res[-2], res[-1]
        scale = variable_on_cpu(
            "rescaling_scale", [],
@@ -486,9 +486,9 @@ def conv_ch_aff_coupling(input_, dim, name,
            scope.reuse_variables()

        if change_bottom:
-            input_, canvas = tf.split(input_, 2, 3)
+            input_, canvas = tf.split(axis=input_, num_or_size_splits=2, value=3)
        else:
-            canvas, input_ = tf.split(input_, 2, 3)
+            canvas, input_ = tf.split(axis=input_, num_or_size_splits=2, value=3)
        shape = input_.get_shape().as_list()
        batch_size = shape[0]
        height = shape[1]
@@ -509,7 +509,7 @@ def conv_ch_aff_coupling(input_, dim, name,
                     train=train, weight_norm=weight_norm,
                     residual_blocks=residual_blocks,
                     bottleneck=bottleneck, skip=skip)
-        shift, log_rescaling = tf.split(res, 2, 3)
+        shift, log_rescaling = tf.split(axis=res, num_or_size_splits=2, value=3)
        scale = variable_on_cpu(
            "scale", [],
            tf.constant_initializer(1.))
@@ -570,9 +570,9 @@ def conv_ch_add_coupling(input_, dim, name,
            scope.reuse_variables()

        if change_bottom:
-            input_, canvas = tf.split(input_, 2, 3)
+            input_, canvas = tf.split(axis=input_, num_or_size_splits=2, value=3)
        else:
-            canvas, input_ = tf.split(input_, 2, 3)
+            canvas, input_ = tf.split(axis=input_, num_or_size_splits=2, value=3)
        shape = input_.get_shape().as_list()
        channels = shape[3]
        res = input_
@@ -736,8 +736,8 @@ def rec_masked_conv_coupling(input_, hps, scale_idx, n_scale,
                log_diff_1 = log_diff[:, :, :, :channels]
                log_diff_2 = log_diff[:, :, :, channels:]
            else:
-                res_1, res_2 = tf.split(res, 2, 3)
-                log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
+                res_1, res_2 = tf.split(axis=res, num_or_size_splits=2, value=3)
+                log_diff_1, log_diff_2 = tf.split(axis=log_diff, num_or_size_splits=2, value=3)
            res_1, inc_log_diff = rec_masked_conv_coupling(
                input_=res_1, hps=hps, scale_idx=scale_idx + 1, n_scale=n_scale,
                use_batch_norm=use_batch_norm, weight_norm=weight_norm,
@@ -798,8 +798,8 @@ def rec_masked_deconv_coupling(input_, hps, scale_idx, n_scale,
                log_diff_1 = log_diff[:, :, :, :channels]
                log_diff_2 = log_diff[:, :, :, channels:]
            else:
-                res_1, res_2 = tf.split(res, 2, 3)
-                log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
+                res_1, res_2 = tf.split(axis=res, num_or_size_splits=2, value=3)
+                log_diff_1, log_diff_2 = tf.split(axis=log_diff, num_or_size_splits=2, value=3)
            res_1, log_diff_1 = rec_masked_deconv_coupling(
                input_=res_1, hps=hps,
                scale_idx=scale_idx + 1, n_scale=n_scale,
@@ -1305,7 +1305,7 @@ class RealNVP(object):
            z_lost = z_complete
            for scale_idx in xrange(hps.n_scale - 1):
                z_lost = squeeze_2x2_ordered(z_lost)
-                z_lost, _ = tf.split(z_lost, 2, 3)
+                z_lost, _ = tf.split(axis=z_lost, num_or_size_splits=2, value=3)
                z_compressed = z_lost
                z_noisy = z_lost
                for _ in xrange(scale_idx + 1):

--- a/real_nvp/real_nvp_utils.py
+++ b/real_nvp/real_nvp_utils.py
@@ -99,8 +99,8 @@ def conv_layer(input_,
                    filter_size[1] - input_.get_shape().as_list()[2],
                    input_.get_shape().as_list()[3]
                ])
-                res = tf.concat(1, [pad_1, res])
-                res = tf.concat(2, [pad_2, res])
+                res = tf.concat(axis=1, values=[pad_1, res])
+                res = tf.concat(axis=2, values=[pad_2, res])
        res = tf.nn.conv2d(
            input=res,
            filter=weights,
@@ -139,8 +139,8 @@ def depool_2x2(input_, stride=2):
    channels = shape[3]
    res = tf.reshape(input_, [batch_size, height, 1, width, 1, channels])
    res = tf.concat(
-        2, [res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
-    res = tf.concat(4, [
+        axis=2, values=[res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
+    res = tf.concat(axis=4, values=[
        res, tf.zeros([batch_size, height, stride, width, stride - 1, channels])
    ])
    res = tf.reshape(res, [batch_size, stride * height, stride * width, channels])
@@ -158,11 +158,11 @@ def batch_random_flip(input_):
    height = shape[1]
    width = shape[2]
    channels = shape[3]
-    res = tf.split(0, batch_size, input_)
+    res = tf.split(axis=0, num_or_size_splits=batch_size, value=input_)
    res = [elem[0, :, :, :] for elem in res]
    res = [tf.image.random_flip_left_right(elem) for elem in res]
    res = [tf.reshape(elem, [1, height, width, channels]) for elem in res]
-    res = tf.concat(0, res)
+    res = tf.concat(axis=0, values=res)

    return res

@@ -175,7 +175,7 @@ def as_one_hot(input_, n_indices):
    n_elem = numpy.prod(shape)
    indices = tf.range(n_elem)
    indices = tf.cast(indices, tf.int64)
-    indices_input = tf.concat(0, [indices, tf.reshape(input_, [-1])])
+    indices_input = tf.concat(axis=0, values=[indices, tf.reshape(input_, [-1])])
    indices_input = tf.reshape(indices_input, [2, -1])
    indices_input = tf.transpose(indices_input)
    res = tf.sparse_to_dense(

--- a/slim/deployment/model_deploy.py
+++ b/slim/deployment/model_deploy.py
@@ -232,10 +232,10 @@ def _gather_clone_loss(clone, num_clones, regularization_losses):
      sum_loss = tf.add_n(all_losses)
  # Add the summaries out of the clone device block.
  if clone_loss is not None:
-    tf.scalar_summary(clone.scope + '/clone_loss', clone_loss,
+    tf.summary.scalar(clone.scope + '/clone_loss', clone_loss,
                      name='clone_loss')
  if regularization_loss is not None:
-    tf.scalar_summary('regularization_loss', regularization_loss,
+    tf.summary.scalar('regularization_loss', regularization_loss,
                      name='regularization_loss')
  return sum_loss

@@ -404,12 +404,12 @@ def deploy(config,

    if total_loss is not None:
      # Add total_loss to summary.
-      summaries.add(tf.scalar_summary('total_loss', total_loss,
+      summaries.add(tf.summary.scalar('total_loss', total_loss,
                                      name='total_loss'))

    if summaries:
      # Merge all summaries together.
-      summary_op = tf.merge_summary(list(summaries), name='summary_op')
+      summary_op = tf.summary.merge(list(summaries), name='summary_op')
    else:
      summary_op = None

@@ -467,9 +467,9 @@ def _add_gradients_summaries(grads_and_vars):
        grad_values = grad.values
      else:
        grad_values = grad
-      summaries.append(tf.histogram_summary(var.op.name + ':gradient',
+      summaries.append(tf.summary.histogram(var.op.name + ':gradient',
                                            grad_values))
-      summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
+      summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
                                            tf.global_norm([grad_values])))
    else:
      tf.logging.info('Var %s has no gradient', var.op.name)

--- a/slim/eval_image_classifier.py
+++ b/slim/eval_image_classifier.py
@@ -160,7 +160,7 @@ def main(_):
    # Print the summaries to screen.
    for name, value in names_to_values.iteritems():
      summary_name = 'eval/%s' % name
-      op = tf.scalar_summary(summary_name, value, collections=[])
+      op = tf.summary.scalar(summary_name, value, collections=[])
      op = tf.Print(op, [value], summary_name)
      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)


--- a/slim/nets/alexnet.py
+++ b/slim/nets/alexnet.py
@@ -113,7 +113,7 @@ def alexnet_v2(inputs,
        net = slim.conv2d(net, num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
-                          biases_initializer=tf.zeros_initializer,
+                          biases_initializer=tf.zeros_initializer(),
                          scope='fc8')

      # Convert end_points_collection into a end_point dict.

--- a/slim/nets/cifarnet.py
+++ b/slim/nets/cifarnet.py
@@ -77,7 +77,7 @@ def cifarnet(images, num_classes=10, is_training=False,
    net = slim.fully_connected(net, 192, scope='fc4')
    end_points['fc4'] = net
    logits = slim.fully_connected(net, num_classes,
-                                  biases_initializer=tf.zeros_initializer,
+                                  biases_initializer=tf.zeros_initializer(),
                                  weights_initializer=trunc_normal(1/192.0),
                                  weights_regularizer=None,
                                  activation_fn=None,

--- a/slim/nets/inception_v1.py
+++ b/slim/nets/inception_v1.py
@@ -93,7 +93,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -110,7 +110,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -132,7 +132,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -149,7 +149,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -166,7 +166,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -183,7 +183,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -200,7 +200,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -222,7 +222,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

@@ -239,7 +239,7 @@ def inception_v1_base(inputs,
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)

--- a/slim/nets/inception_v2.py
+++ b/slim/nets/inception_v2.py
@@ -145,7 +145,7 @@ def inception_v2_base(inputs,
              branch_3, depth(32), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 256
@@ -175,7 +175,7 @@ def inception_v2_base(inputs,
              branch_3, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 320
@@ -200,7 +200,7 @@ def inception_v2_base(inputs,
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(
              net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
-        net = tf.concat(3, [branch_0, branch_1, branch_2])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
@@ -230,7 +230,7 @@ def inception_v2_base(inputs,
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
@@ -260,7 +260,7 @@ def inception_v2_base(inputs,
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
@@ -290,7 +290,7 @@ def inception_v2_base(inputs,
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points

@@ -321,7 +321,7 @@ def inception_v2_base(inputs,
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
@@ -346,7 +346,7 @@ def inception_v2_base(inputs,
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
                                     scope='MaxPool_1a_3x3')
-        net = tf.concat(3, [branch_0, branch_1, branch_2])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 7 x 7 x 1024
@@ -376,7 +376,7 @@ def inception_v2_base(inputs,
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points

@@ -407,7 +407,7 @@ def inception_v2_base(inputs,
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)