Merge pull request #1 from tensorflow/master

update to tensorflow/model master

Merge pull request #1 from tensorflow/master
update to tensorflow/model master
68a18b70 · Toby Boyd · GitHub · bc70271a · 2c4fea8d · 68a18b70
Commit 68a18b70 authored Jun 08, 2017 by Toby Boyd Committed by GitHub Jun 08, 2017
20 changed files
--- a/attention_ocr/python/model_test.py
+++ b/attention_ocr/python/model_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for the model."""
+
+import numpy as np
+import string
+import tensorflow as tf
+from tensorflow.contrib import slim
+from tensorflow.contrib.tfprof import model_analyzer
+
+import model
+import data_provider
+
+
+def create_fake_charset(num_char_classes):
+  charset = {}
+  for i in xrange(num_char_classes):
+    charset[i] = string.printable[i % len(string.printable)]
+  return charset
+
+
+class ModelTest(tf.test.TestCase):
+  def setUp(self):
+    tf.test.TestCase.setUp(self)
+
+    self.rng = np.random.RandomState([11, 23, 50])
+
+    self.batch_size = 4
+    self.image_width = 600
+    self.image_height = 30
+    self.seq_length = 40
+    self.num_char_classes = 72
+    self.null_code = 62
+    self.num_views = 4
+
+    feature_size = 288
+    self.conv_tower_shape = (self.batch_size, 1, 72, feature_size)
+    self.features_shape = (self.batch_size, self.seq_length, feature_size)
+    self.chars_logit_shape = (self.batch_size, self.seq_length,
+                              self.num_char_classes)
+    self.length_logit_shape = (self.batch_size, self.seq_length + 1)
+
+    self.initialize_fakes()
+
+  def initialize_fakes(self):
+    self.images_shape = (self.batch_size, self.image_height, self.image_width,
+                         3)
+    self.fake_images = tf.constant(
+        self.rng.randint(low=0, high=255,
+                         size=self.images_shape).astype('float32'),
+        name='input_node')
+    self.fake_conv_tower_np = tf.constant(
+        self.rng.randn(*self.conv_tower_shape).astype('float32'))
+    self.fake_logits = tf.constant(
+        self.rng.randn(*self.chars_logit_shape).astype('float32'))
+    self.fake_labels = tf.constant(
+        self.rng.randint(
+            low=0,
+            high=self.num_char_classes,
+            size=(self.batch_size, self.seq_length)).astype('int64'))
+
+  def create_model(self):
+    return model.Model(
+        self.num_char_classes, self.seq_length, num_views=4, null_code=62)
+
+  def test_char_related_shapes(self):
+    ocr_model = self.create_model()
+    with self.test_session() as sess:
+      endpoints_tf = ocr_model.create_base(
+          images=self.fake_images, labels_one_hot=None)
+
+      sess.run(tf.global_variables_initializer())
+      endpoints = sess.run(endpoints_tf)
+
+      self.assertEqual((self.batch_size, self.seq_length,
+                        self.num_char_classes), endpoints.chars_logit.shape)
+      self.assertEqual((self.batch_size, self.seq_length,
+                        self.num_char_classes), endpoints.chars_log_prob.shape)
+      self.assertEqual((self.batch_size, self.seq_length),
+                       endpoints.predicted_chars.shape)
+      self.assertEqual((self.batch_size, self.seq_length),
+                       endpoints.predicted_scores.shape)
+
+  def test_predicted_scores_are_within_range(self):
+    ocr_model = self.create_model()
+
+    _, _, scores = ocr_model.char_predictions(self.fake_logits)
+    with self.test_session() as sess:
+      scores_np = sess.run(scores)
+
+    values_in_range = (scores_np >= 0.0) & (scores_np <= 1.0)
+    self.assertTrue(
+        np.all(values_in_range),
+        msg=('Scores contains out of the range values %s' %
+             scores_np[np.logical_not(values_in_range)]))
+
+  def test_conv_tower_shape(self):
+    with self.test_session() as sess:
+      ocr_model = self.create_model()
+      conv_tower = ocr_model.conv_tower_fn(self.fake_images)
+
+      sess.run(tf.global_variables_initializer())
+      conv_tower_np = sess.run(conv_tower)
+
+      self.assertEqual(self.conv_tower_shape, conv_tower_np.shape)
+
+  def test_model_size_less_then1_gb(self):
+    # NOTE: Actual amount of memory occupied my TF during training will be at
+    # least 4X times bigger because of space need to store original weights,
+    # updates, gradients and variances. It also depends on the type of used
+    # optimizer.
+    ocr_model = self.create_model()
+    ocr_model.create_base(images=self.fake_images, labels_one_hot=None)
+    with self.test_session() as sess:
+      tfprof_root = model_analyzer.print_model_analysis(
+          sess.graph,
+          tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
+
+      model_size_bytes = 4 * tfprof_root.total_parameters
+      self.assertLess(model_size_bytes, 1 * 2**30)
+
+  def test_create_summaries_is_runnable(self):
+    ocr_model = self.create_model()
+    data = data_provider.InputEndpoints(
+        images=self.fake_images,
+        images_orig=self.fake_images,
+        labels=self.fake_labels,
+        labels_one_hot=slim.one_hot_encoding(self.fake_labels,
+                                             self.num_char_classes))
+    endpoints = ocr_model.create_base(
+        images=self.fake_images, labels_one_hot=None)
+    charset = create_fake_charset(self.num_char_classes)
+    summaries = ocr_model.create_summaries(
+        data, endpoints, charset, is_training=False)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(tf.local_variables_initializer())
+      tf.tables_initializer().run()
+      sess.run(summaries)  # just check it is runnable
+
+  def test_sequence_loss_function_without_label_smoothing(self):
+    model = self.create_model()
+    model.set_mparam('sequence_loss_fn', label_smoothing=0)
+
+    loss = model.sequence_loss_fn(self.fake_logits, self.fake_labels)
+    with self.test_session() as sess:
+      loss_np = sess.run(loss)
+
+    # This test checks that the loss function is 'runnable'.
+    self.assertEqual(loss_np.shape, tuple())
+
+
+class CharsetMapperTest(tf.test.TestCase):
+  def test_text_corresponds_to_ids(self):
+    charset = create_fake_charset(36)
+    ids = tf.constant(
+        [[17, 14, 21, 21, 24], [32, 24, 27, 21, 13]], dtype=tf.int64)
+    charset_mapper = model.CharsetMapper(charset)
+
+    with self.test_session() as sess:
+      tf.tables_initializer().run()
+      text = sess.run(charset_mapper.get_text(ids))
+
+    self.assertAllEqual(text, ['hello', 'world'])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/attention_ocr/python/sequence_layers.py
+++ b/attention_ocr/python/sequence_layers.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Various implementations of sequence layers for character prediction.
+
+A 'sequence layer' is a part of a computation graph which is responsible of
+producing a sequence of characters using extracted image features. There are
+many reasonable ways to implement such layers. All of them are using RNNs.
+This module provides implementations which uses 'attention' mechanism to
+spatially 'pool' image features and also can use a previously predicted
+character to predict the next (aka auto regression).
+
+Usage:
+  Select one of available classes, e.g. Attention or use a wrapper function to
+  pick one based on your requirements:
+  layer_class = sequence_layers.get_layer_class(use_attention=True,
+                                                use_autoregression=True)
+  layer = layer_class(net, labels_one_hot, model_params, method_params)
+  char_logits = layer.create_logits()
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import abc
+import logging
+import numpy as np
+
+import tensorflow as tf
+
+from tensorflow.contrib import slim
+
+
+def orthogonal_initializer(shape, dtype=tf.float32, *args, **kwargs):
+  """Generates orthonormal matrices with random values.
+
+  Orthonormal initialization is important for RNNs:
+    http://arxiv.org/abs/1312.6120
+    http://smerity.com/articles/2016/orthogonal_init.html
+
+  For non-square shapes the returned matrix will be semi-orthonormal: if the
+  number of columns exceeds the number of rows, then the rows are orthonormal
+  vectors; but if the number of rows exceeds the number of columns, then the
+  columns are orthonormal vectors.
+
+  We use SVD decomposition to generate an orthonormal matrix with random
+  values. The same way as it is done in the Lasagne library for Theano. Note
+  that both u and v returned by the svd are orthogonal and random. We just need
+  to pick one with the right shape.
+
+  Args:
+    shape: a shape of the tensor matrix to initialize.
+    dtype: a dtype of the initialized tensor.
+    *args: not used.
+    **kwargs: not used.
+
+  Returns:
+    An initialized tensor.
+  """
+  del args
+  del kwargs
+  flat_shape = (shape[0], np.prod(shape[1:]))
+  w = np.random.randn(*flat_shape)
+  u, _, v = np.linalg.svd(w, full_matrices=False)
+  w = u if u.shape == flat_shape else v
+  return tf.constant(w.reshape(shape), dtype=dtype)
+
+
+SequenceLayerParams = collections.namedtuple('SequenceLogitsParams', [
+    'num_lstm_units', 'weight_decay', 'lstm_state_clip_value'
+])
+
+
+class SequenceLayerBase(object):
+  """A base abstruct class for all sequence layers.
+
+  A child class has to define following methods:
+    get_train_input
+    get_eval_input
+    unroll_cell
+  """
+  __metaclass__ = abc.ABCMeta
+
+  def __init__(self, net, labels_one_hot, model_params, method_params):
+    """Stores argument in member variable for further use.
+
+    Args:
+      net: A tensor with shape [batch_size, num_features, feature_size] which
+        contains some extracted image features.
+      labels_one_hot: An optional (can be None) ground truth labels for the
+        input features. Is a tensor with shape
+        [batch_size, seq_length, num_char_classes]
+      model_params: A namedtuple with model parameters (model.ModelParams).
+      method_params: A SequenceLayerParams instance.
+    """
+    self._params = model_params
+    self._mparams = method_params
+    self._net = net
+    self._labels_one_hot = labels_one_hot
+    self._batch_size = net.get_shape().dims[0].value
+
+    # Initialize parameters for char logits which will be computed on the fly
+    # inside an LSTM decoder.
+    self._char_logits = {}
+    regularizer = slim.l2_regularizer(self._mparams.weight_decay)
+    self._softmax_w = slim.model_variable(
+        'softmax_w',
+        [self._mparams.num_lstm_units, self._params.num_char_classes],
+        initializer=orthogonal_initializer,
+        regularizer=regularizer)
+    self._softmax_b = slim.model_variable(
+        'softmax_b', [self._params.num_char_classes],
+        initializer=tf.zeros_initializer(),
+        regularizer=regularizer)
+
+  @abc.abstractmethod
+  def get_train_input(self, prev, i):
+    """Returns a sample to be used to predict a character during training.
+
+    This function is used as a loop_function for an RNN decoder.
+
+    Args:
+      prev: output tensor from previous step of the RNN. A tensor with shape:
+        [batch_size, num_char_classes].
+      i: index of a character in the output sequence.
+
+    Returns:
+      A tensor with shape [batch_size, ?] - depth depends on implementation
+      details.
+    """
+    pass
+
+  @abc.abstractmethod
+  def get_eval_input(self, prev, i):
+    """Returns a sample to be used to predict a character during inference.
+
+    This function is used as a loop_function for an RNN decoder.
+
+    Args:
+      prev: output tensor from previous step of the RNN. A tensor with shape:
+        [batch_size, num_char_classes].
+      i: index of a character in the output sequence.
+
+    Returns:
+      A tensor with shape [batch_size, ?] - depth depends on implementation
+      details.
+    """
+    raise AssertionError('Not implemented')
+
+  @abc.abstractmethod
+  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
+    """Unrolls an RNN cell for all inputs.
+
+    This is a placeholder to call some RNN decoder. It has a similar to
+    tf.seq2seq.rnn_decode interface.
+
+    Args:
+      decoder_inputs: A list of 2D Tensors* [batch_size x input_size]. In fact,
+        most of existing decoders in presence of a loop_function use only the
+        first element to determine batch_size and length of the list to
+        determine number of steps.
+      initial_state: 2D Tensor with shape [batch_size x cell.state_size].
+      loop_function: function will be applied to the i-th output in order to
+        generate the i+1-st input (see self.get_input).
+      cell: rnn_cell.RNNCell defining the cell function and size.
+
+    Returns:
+      A tuple of the form (outputs, state), where:
+        outputs: A list of character logits of the same length as
+        decoder_inputs of 2D Tensors with shape [batch_size x num_characters].
+        state: The state of each cell at the final time-step.
+          It is a 2D Tensor of shape [batch_size x cell.state_size].
+    """
+    pass
+
+  def is_training(self):
+    """Returns True if the layer is created for training stage."""
+    return self._labels_one_hot is not None
+
+  def char_logit(self, inputs, char_index):
+    """Creates logits for a character if required.
+
+    Args:
+      inputs: A tensor with shape [batch_size, ?] (depth is implementation
+        dependent).
+      char_index: A integer index of a character in the output sequence.
+
+    Returns:
+      A tensor with shape [batch_size, num_char_classes]
+    """
+    if char_index not in self._char_logits:
+      self._char_logits[char_index] = tf.nn.xw_plus_b(inputs, self._softmax_w,
+                                                      self._softmax_b)
+    return self._char_logits[char_index]
+
+  def char_one_hot(self, logit):
+    """Creates one hot encoding for a logit of a character.
+
+    Args:
+      logit: A tensor with shape [batch_size, num_char_classes].
+
+    Returns:
+      A tensor with shape [batch_size, num_char_classes]
+    """
+    prediction = tf.argmax(logit, dimension=1)
+    return slim.one_hot_encoding(prediction, self._params.num_char_classes)
+
+  def get_input(self, prev, i):
+    """A wrapper for get_train_input and get_eval_input.
+
+    Args:
+      prev: output tensor from previous step of the RNN. A tensor with shape:
+        [batch_size, num_char_classes].
+      i: index of a character in the output sequence.
+
+    Returns:
+      A tensor with shape [batch_size, ?] - depth depends on implementation
+      details.
+    """
+    if self.is_training():
+      return self.get_train_input(prev, i)
+    else:
+      return self.get_eval_input(prev, i)
+
+  def create_logits(self):
+    """Creates character sequence logits for a net specified in the constructor.
+
+    A "main" method for the sequence layer which glues together all pieces.
+
+    Returns:
+      A tensor with shape [batch_size, seq_length, num_char_classes].
+    """
+    with tf.variable_scope('LSTM'):
+      first_label = self.get_input(prev=None, i=0)
+      decoder_inputs = [first_label] + [None] * (self._params.seq_length - 1)
+      lstm_cell = tf.contrib.rnn.LSTMCell(
+          self._mparams.num_lstm_units,
+          use_peepholes=False,
+          cell_clip=self._mparams.lstm_state_clip_value,
+          state_is_tuple=True,
+          initializer=orthogonal_initializer)
+      lstm_outputs, _ = self.unroll_cell(
+          decoder_inputs=decoder_inputs,
+          initial_state=lstm_cell.zero_state(self._batch_size, tf.float32),
+          loop_function=self.get_input,
+          cell=lstm_cell)
+
+    with tf.variable_scope('logits'):
+      logits_list = [
+          tf.expand_dims(self.char_logit(logit, i), dim=1)
+          for i, logit in enumerate(lstm_outputs)
+      ]
+
+    return tf.concat(logits_list, 1)
+
+
+class NetSlice(SequenceLayerBase):
+  """A layer which uses a subset of image features to predict each character.
+  """
+
+  def __init__(self, *args, **kwargs):
+    super(NetSlice, self).__init__(*args, **kwargs)
+    self._zero_label = tf.zeros(
+        [self._batch_size, self._params.num_char_classes])
+
+  def get_image_feature(self, char_index):
+    """Returns a subset of image features for a character.
+
+    Args:
+      char_index: an index of a character.
+
+    Returns:
+      A tensor with shape [batch_size, ?]. The output depth depends on the
+      depth of input net.
+    """
+    batch_size, features_num, _ = [d.value for d in self._net.get_shape()]
+    slice_len = int(features_num / self._params.seq_length)
+    # In case when features_num != seq_length, we just pick a subset of image
+    # features, this choice is arbitrary and there is no intuitive geometrical
+    # interpretation. If features_num is not dividable by seq_length there will
+    # be unused image features.
+    net_slice = self._net[:, char_index:char_index + slice_len, :]
+    feature = tf.reshape(net_slice, [batch_size, -1])
+    logging.debug('Image feature: %s', feature)
+    return feature
+
+  def get_eval_input(self, prev, i):
+    """See SequenceLayerBase.get_eval_input for details."""
+    del prev
+    return self.get_image_feature(i)
+
+  def get_train_input(self, prev, i):
+    """See SequenceLayerBase.get_train_input for details."""
+    return self.get_eval_input(prev, i)
+
+  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
+    """See SequenceLayerBase.unroll_cell for details."""
+    return tf.contrib.legacy_seq2seq.rnn_decoder(
+        decoder_inputs=decoder_inputs,
+        initial_state=initial_state,
+        cell=cell,
+        loop_function=self.get_input)
+
+
+class NetSliceWithAutoregression(NetSlice):
+  """A layer similar to NetSlice, but it also uses auto regression.
+
+  The "auto regression" means that we use network output for previous character
+  as a part of input for the current character.
+  """
+
+  def __init__(self, *args, **kwargs):
+    super(NetSliceWithAutoregression, self).__init__(*args, **kwargs)
+
+  def get_eval_input(self, prev, i):
+    """See SequenceLayerBase.get_eval_input for details."""
+    if i == 0:
+      prev = self._zero_label
+    else:
+      logit = self.char_logit(prev, char_index=i - 1)
+      prev = self.char_one_hot(logit)
+    image_feature = self.get_image_feature(char_index=i)
+    return tf.concat([image_feature, prev], 1)
+
+  def get_train_input(self, prev, i):
+    """See SequenceLayerBase.get_train_input for details."""
+    if i == 0:
+      prev = self._zero_label
+    else:
+      prev = self._labels_one_hot[:, i - 1, :]
+    image_feature = self.get_image_feature(i)
+    return tf.concat([image_feature, prev], 1)
+
+
+class Attention(SequenceLayerBase):
+  """A layer which uses attention mechanism to select image features."""
+
+  def __init__(self, *args, **kwargs):
+    super(Attention, self).__init__(*args, **kwargs)
+    self._zero_label = tf.zeros(
+        [self._batch_size, self._params.num_char_classes])
+
+  def get_eval_input(self, prev, i):
+    """See SequenceLayerBase.get_eval_input for details."""
+    del prev, i
+    # The attention_decoder will fetch image features from the net, no need for
+    # extra inputs.
+    return self._zero_label
+
+  def get_train_input(self, prev, i):
+    """See SequenceLayerBase.get_train_input for details."""
+    return self.get_eval_input(prev, i)
+
+  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
+    return tf.contrib.legacy_seq2seq.attention_decoder(
+        decoder_inputs=decoder_inputs,
+        initial_state=initial_state,
+        attention_states=self._net,
+        cell=cell,
+        loop_function=self.get_input)
+
+
+class AttentionWithAutoregression(Attention):
+  """A layer which uses both attention and auto regression."""
+
+  def __init__(self, *args, **kwargs):
+    super(AttentionWithAutoregression, self).__init__(*args, **kwargs)
+
+  def get_train_input(self, prev, i):
+    """See SequenceLayerBase.get_train_input for details."""
+    if i == 0:
+      return self._zero_label
+    else:
+      # TODO(gorban): update to gradually introduce gt labels.
+      return self._labels_one_hot[:, i - 1, :]
+
+  def get_eval_input(self, prev, i):
+    """See SequenceLayerBase.get_eval_input for details."""
+    if i == 0:
+      return self._zero_label
+    else:
+      logit = self.char_logit(prev, char_index=i - 1)
+      return self.char_one_hot(logit)
+
+
+def get_layer_class(use_attention, use_autoregression):
+  """A convenience function to get a layer class based on requirements.
+
+  Args:
+    use_attention: if True a returned class will use attention.
+    use_autoregression: if True a returned class will use auto regression.
+
+  Returns:
+    One of available sequence layers (child classes for SequenceLayerBase).
+  """
+  if use_attention and use_autoregression:
+    layer_class = AttentionWithAutoregression
+  elif use_attention and not use_autoregression:
+    layer_class = Attention
+  elif not use_attention and not use_autoregression:
+    layer_class = NetSlice
+  elif not use_attention and use_autoregression:
+    layer_class = NetSliceWithAutoregression
+  else:
+    raise AssertionError('Unsupported sequence layer class')
+
+  logging.debug('Use %s as a layer class', layer_class.__name__)
+  return layer_class
--- a/attention_ocr/python/sequence_layers_test.py
+++ b/attention_ocr/python/sequence_layers_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for sequence_layers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.contrib import slim
+
+import model
+import sequence_layers
+
+
+def fake_net(batch_size, num_features, feature_size):
+  return tf.convert_to_tensor(
+      np.random.uniform(size=(batch_size, num_features, feature_size)),
+      dtype=tf.float32)
+
+
+def fake_labels(batch_size, seq_length, num_char_classes):
+  labels_np = tf.convert_to_tensor(
+      np.random.randint(
+          low=0, high=num_char_classes, size=(batch_size, seq_length)))
+  return slim.one_hot_encoding(labels_np, num_classes=num_char_classes)
+
+
+def create_layer(layer_class, batch_size, seq_length, num_char_classes):
+  model_params = model.ModelParams(
+      num_char_classes=num_char_classes,
+      seq_length=seq_length,
+      num_views=1,
+      null_code=num_char_classes)
+  net = fake_net(
+      batch_size=batch_size, num_features=seq_length * 5, feature_size=6)
+  labels_one_hot = fake_labels(batch_size, seq_length, num_char_classes)
+  layer_params = sequence_layers.SequenceLayerParams(
+      num_lstm_units=10, weight_decay=0.00004, lstm_state_clip_value=10.0)
+  return layer_class(net, labels_one_hot, model_params, layer_params)
+
+
+class SequenceLayersTest(tf.test.TestCase):
+  def test_net_slice_char_logits_with_correct_shape(self):
+    batch_size = 2
+    seq_length = 4
+    num_char_classes = 3
+
+    layer = create_layer(sequence_layers.NetSlice, batch_size, seq_length,
+                         num_char_classes)
+    char_logits = layer.create_logits()
+
+    self.assertEqual(
+        tf.TensorShape([batch_size, seq_length, num_char_classes]),
+        char_logits.get_shape())
+
+  def test_net_slice_with_autoregression_char_logits_with_correct_shape(self):
+    batch_size = 2
+    seq_length = 4
+    num_char_classes = 3
+
+    layer = create_layer(sequence_layers.NetSliceWithAutoregression,
+                         batch_size, seq_length, num_char_classes)
+    char_logits = layer.create_logits()
+
+    self.assertEqual(
+        tf.TensorShape([batch_size, seq_length, num_char_classes]),
+        char_logits.get_shape())
+
+  def test_attention_char_logits_with_correct_shape(self):
+    batch_size = 2
+    seq_length = 4
+    num_char_classes = 3
+
+    layer = create_layer(sequence_layers.Attention, batch_size, seq_length,
+                         num_char_classes)
+    char_logits = layer.create_logits()
+
+    self.assertEqual(
+        tf.TensorShape([batch_size, seq_length, num_char_classes]),
+        char_logits.get_shape())
+
+  def test_attention_with_autoregression_char_logits_with_correct_shape(self):
+    batch_size = 2
+    seq_length = 4
+    num_char_classes = 3
+
+    layer = create_layer(sequence_layers.AttentionWithAutoregression,
+                         batch_size, seq_length, num_char_classes)
+    char_logits = layer.create_logits()
+
+    self.assertEqual(
+        tf.TensorShape([batch_size, seq_length, num_char_classes]),
+        char_logits.get_shape())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/attention_ocr/python/train.py
+++ b/attention_ocr/python/train.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Script to train the Attention OCR model.
+
+A simple usage example:
+python train.py
+"""
+import collections
+import logging
+import tensorflow as tf
+from tensorflow.contrib import slim
+from tensorflow import app
+from tensorflow.python.platform import flags
+from tensorflow.contrib.tfprof import model_analyzer
+
+import data_provider
+import common_flags
+
+FLAGS = flags.FLAGS
+common_flags.define()
+
+# yapf: disable
+flags.DEFINE_integer('task', 0,
+                     'The Task ID. This value is used when training with '
+                     'multiple workers to identify each worker.')
+
+flags.DEFINE_integer('ps_tasks', 0,
+                     'The number of parameter servers. If the value is 0, then'
+                     ' the parameters are handled locally by the worker.')
+
+flags.DEFINE_integer('save_summaries_secs', 60,
+                     'The frequency with which summaries are saved, in '
+                     'seconds.')
+
+flags.DEFINE_integer('save_interval_secs', 600,
+                     'Frequency in seconds of saving the model.')
+
+flags.DEFINE_integer('max_number_of_steps', int(1e10),
+                     'The maximum number of gradient steps.')
+
+flags.DEFINE_string('checkpoint_inception', '',
+                    'Checkpoint to recover inception weights from.')
+
+flags.DEFINE_float('clip_gradient_norm', 2.0,
+                   'If greater than 0 then the gradients would be clipped by '
+                   'it.')
+
+flags.DEFINE_bool('sync_replicas', False,
+                  'If True will synchronize replicas during training.')
+
+flags.DEFINE_integer('replicas_to_aggregate', 1,
+                     'The number of gradients updates before updating params.')
+
+flags.DEFINE_integer('total_num_replicas', 1,
+                     'Total number of worker replicas.')
+
+flags.DEFINE_integer('startup_delay_steps', 15,
+                     'Number of training steps between replicas startup.')
+
+flags.DEFINE_boolean('reset_train_dir', False,
+                     'If true will delete all files in the train_log_dir')
+
+flags.DEFINE_boolean('show_graph_stats', False,
+                     'Output model size stats to stderr.')
+# yapf: enable
+
+TrainingHParams = collections.namedtuple('TrainingHParams', [
+    'learning_rate',
+    'optimizer',
+    'momentum',
+    'use_augment_input',
+])
+
+
+def get_training_hparams():
+  return TrainingHParams(
+      learning_rate=FLAGS.learning_rate,
+      optimizer=FLAGS.optimizer,
+      momentum=FLAGS.momentum,
+      use_augment_input=FLAGS.use_augment_input)
+
+
+def create_optimizer(hparams):
+  """Creates optimized based on the specified flags."""
+  if hparams.optimizer == 'momentum':
+    optimizer = tf.train.MomentumOptimizer(
+        hparams.learning_rate, momentum=hparams.momentum)
+  elif hparams.optimizer == 'adam':
+    optimizer = tf.train.AdamOptimizer(hparams.learning_rate)
+  elif hparams.optimizer == 'adadelta':
+    optimizer = tf.train.AdadeltaOptimizer(hparams.learning_rate)
+  elif hparams.optimizer == 'adagrad':
+    optimizer = tf.train.AdagradOptimizer(hparams.learning_rate)
+  elif hparams.optimizer == 'rmsprop':
+    optimizer = tf.train.RMSPropOptimizer(
+        hparams.learning_rate, momentum=hparams.momentum)
+  return optimizer
+
+
+def train(loss, init_fn, hparams):
+  """Wraps slim.learning.train to run a training loop.
+
+  Args:
+    loss: a loss tensor
+    init_fn: A callable to be executed after all other initialization is done.
+    hparams: a model hyper parameters
+  """
+  optimizer = create_optimizer(hparams)
+
+  if FLAGS.sync_replicas:
+    replica_id = tf.constant(FLAGS.task, tf.int32, shape=())
+    optimizer = tf.LegacySyncReplicasOptimizer(
+        opt=optimizer,
+        replicas_to_aggregate=FLAGS.replicas_to_aggregate,
+        replica_id=replica_id,
+        total_num_replicas=FLAGS.total_num_replicas)
+    sync_optimizer = optimizer
+    startup_delay_steps = 0
+  else:
+    startup_delay_steps = 0
+    sync_optimizer = None
+
+  train_op = slim.learning.create_train_op(
+      loss,
+      optimizer,
+      summarize_gradients=True,
+      clip_gradient_norm=FLAGS.clip_gradient_norm)
+
+  slim.learning.train(
+      train_op=train_op,
+      logdir=FLAGS.train_log_dir,
+      graph=loss.graph,
+      master=FLAGS.master,
+      is_chief=(FLAGS.task == 0),
+      number_of_steps=FLAGS.max_number_of_steps,
+      save_summaries_secs=FLAGS.save_summaries_secs,
+      save_interval_secs=FLAGS.save_interval_secs,
+      startup_delay_steps=startup_delay_steps,
+      sync_optimizer=sync_optimizer,
+      init_fn=init_fn)
+
+
+def prepare_training_dir():
+  if not tf.gfile.Exists(FLAGS.train_log_dir):
+    logging.info('Create a new training directory %s', FLAGS.train_log_dir)
+    tf.gfile.MakeDirs(FLAGS.train_log_dir)
+  else:
+    if FLAGS.reset_train_dir:
+      logging.info('Reset the training directory %s', FLAGS.train_log_dir)
+      tf.gfile.DeleteRecursively(FLAGS.train_log_dir)
+      tf.gfile.MakeDirs(FLAGS.train_log_dir)
+    else:
+      logging.info('Use already existing training directory %s',
+                   FLAGS.train_log_dir)
+
+
+def calculate_graph_metrics():
+  param_stats = model_analyzer.print_model_analysis(
+      tf.get_default_graph(),
+      tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
+  return param_stats.total_parameters
+
+
+def main(_):
+  prepare_training_dir()
+
+  dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
+  model = common_flags.create_model(dataset.num_char_classes,
+                                    dataset.max_sequence_length,
+                                    dataset.num_of_views, dataset.null_code)
+  hparams = get_training_hparams()
+
+  # If ps_tasks is zero, the local device is used. When using multiple
+  # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
+  # across the different devices.
+  device_setter = tf.train.replica_device_setter(
+      FLAGS.ps_tasks, merge_devices=True)
+  with tf.device(device_setter):
+    data = data_provider.get_data(
+        dataset,
+        FLAGS.batch_size,
+        augment=hparams.use_augment_input,
+        central_crop_size=common_flags.get_crop_size())
+    endpoints = model.create_base(data.images, data.labels_one_hot)
+    total_loss = model.create_loss(data, endpoints)
+    model.create_summaries(data, endpoints, dataset.charset, is_training=True)
+    init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint,
+                                              FLAGS.checkpoint_inception)
+    if FLAGS.show_graph_stats:
+      logging.info('Total number of weights in the graph: %s',
+                   calculate_graph_metrics())
+    train(total_loss, init_fn, hparams)
+
+
+if __name__ == '__main__':
+  app.run()
--- a/attention_ocr/python/utils.py
+++ b/attention_ocr/python/utils.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Functions to support building models for StreetView text transcription."""
+
+import tensorflow as tf
+from tensorflow.contrib import slim
+
+
+def logits_to_log_prob(logits):
+  """Computes log probabilities using numerically stable trick.
+
+  This uses two numerical stability tricks:
+  1) softmax(x) = softmax(x - c) where c is a constant applied to all
+  arguments. If we set c = max(x) then the softmax is more numerically
+  stable.
+  2) log softmax(x) is not numerically stable, but we can stabilize it
+  by using the identity log softmax(x) = x - log sum exp(x)
+
+  Args:
+    logits: Tensor of arbitrary shape whose last dimension contains logits.
+
+  Returns:
+    A tensor of the same shape as the input, but with corresponding log
+    probabilities.
+  """
+
+  with tf.variable_scope('log_probabilities'):
+    reduction_indices = len(logits.shape.as_list()) - 1
+    max_logits = tf.reduce_max(
+        logits, reduction_indices=reduction_indices, keep_dims=True)
+    safe_logits = tf.subtract(logits, max_logits)
+    sum_exp = tf.reduce_sum(
+        tf.exp(safe_logits),
+        reduction_indices=reduction_indices,
+        keep_dims=True)
+    log_probs = tf.subtract(safe_logits, tf.log(sum_exp))
+  return log_probs
+
+
+def variables_to_restore(scope=None, strip_scope=False):
+  """Returns a list of variables to restore for the specified list of methods.
+
+  It is supposed that variable name starts with the method's scope (a prefix
+  returned by _method_scope function).
+
+  Args:
+    methods_names: a list of names of configurable methods.
+    strip_scope: if True will return variable names without method's scope.
+      If methods_names is None will return names unchanged.
+    model_scope: a scope for a whole model.
+
+  Returns:
+    a dictionary mapping variable names to variables for restore.
+  """
+  if scope:
+    variable_map = {}
+    method_variables = slim.get_variables_to_restore(include=[scope])
+    for var in method_variables:
+      if strip_scope:
+        var_name = var.op.name[len(scope) + 1:]
+      else:
+        var_name = var.op.name
+      variable_map[var_name] = var
+
+    return variable_map
+  else:
+    return {v.op.name: v for v in slim.get_variables_to_restore()}
--- a/autoencoder/AdditiveGaussianNoiseAutoencoderRunner.py
+++ b/autoencoder/AdditiveGaussianNoiseAutoencoderRunner.py
@@ -4,7 +4,7 @@ import sklearn.preprocessing as prep
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

-from autoencoder.autoencoder_models.DenoisingAutoencoder import AdditiveGaussianNoiseAutoencoder
+from autoencoder_models.DenoisingAutoencoder import AdditiveGaussianNoiseAutoencoder

 mnist = input_data.read_data_sets('MNIST_data', one_hot = True)

@@ -45,7 +45,6 @@ for epoch in range(training_epochs):

    # Display logs per epoch step
    if epoch % display_step == 0:
-        print "Epoch:", '%04d' % (epoch + 1), \
-            "cost=", "{:.9f}".format(avg_cost)
+        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))

-print "Total cost: " + str(autoencoder.calc_total_cost(X_test))
+print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
--- a/autoencoder/AutoencoderRunner.py
+++ b/autoencoder/AutoencoderRunner.py
@@ -4,7 +4,7 @@ import sklearn.preprocessing as prep
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

-from autoencoder.autoencoder_models.Autoencoder import Autoencoder
+from autoencoder_models.Autoencoder import Autoencoder

 mnist = input_data.read_data_sets('MNIST_data', one_hot = True)

@@ -44,7 +44,6 @@ for epoch in range(training_epochs):

    # Display logs per epoch step
    if epoch % display_step == 0:
-        print "Epoch:", '%04d' % (epoch + 1), \
-            "cost=", "{:.9f}".format(avg_cost)
+        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))

-print "Total cost: " + str(autoencoder.calc_total_cost(X_test))
+print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
--- a/autoencoder/MaskingNoiseAutoencoderRunner.py
+++ b/autoencoder/MaskingNoiseAutoencoderRunner.py
@@ -4,7 +4,7 @@ import sklearn.preprocessing as prep
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

-from autoencoder.autoencoder_models.DenoisingAutoencoder import MaskingNoiseAutoencoder
+from autoencoder_models.DenoisingAutoencoder import MaskingNoiseAutoencoder

 mnist = input_data.read_data_sets('MNIST_data', one_hot = True)

@@ -43,7 +43,6 @@ for epoch in range(training_epochs):
        avg_cost += cost / n_samples * batch_size

    if epoch % display_step == 0:
-        print "Epoch:", '%04d' % (epoch + 1), \
-            "cost=", "{:.9f}".format(avg_cost)
+        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))

-print "Total cost: " + str(autoencoder.calc_total_cost(X_test))
+print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
--- a/autoencoder/Utils.py
+++ b/autoencoder/Utils.py
-import numpy as np
-import tensorflow as tf
-
-def xavier_init(fan_in, fan_out, constant = 1):
-    low = -constant * np.sqrt(6.0 / (fan_in + fan_out))
-    high = constant * np.sqrt(6.0 / (fan_in + fan_out))
-    return tf.random_uniform((fan_in, fan_out),
-                             minval = low, maxval = high,
-                             dtype = tf.float32)
--- a/autoencoder/VariationalAutoencoderRunner.py
+++ b/autoencoder/VariationalAutoencoderRunner.py
@@ -4,7 +4,7 @@ import sklearn.preprocessing as prep
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

-from autoencoder.autoencoder_models.VariationalAutoencoder import VariationalAutoencoder
+from autoencoder_models.VariationalAutoencoder import VariationalAutoencoder

 mnist = input_data.read_data_sets('MNIST_data', one_hot = True)

@@ -47,7 +47,6 @@ for epoch in range(training_epochs):

    # Display logs per epoch step
    if epoch % display_step == 0:
-        print "Epoch:", '%04d' % (epoch + 1), \
-            "cost=", "{:.9f}".format(avg_cost)
+        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))

-print "Total cost: " + str(autoencoder.calc_total_cost(X_test))
+print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
--- a/autoencoder/autoencoder_models/Autoencoder.py
+++ b/autoencoder/autoencoder_models/Autoencoder.py
 import tensorflow as tf
-import numpy as np
-import autoencoder.Utils

 class Autoencoder(object):

@@ -18,7 +16,7 @@ class Autoencoder(object):
        self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])

        # cost
-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        self.optimizer = optimizer.minimize(self.cost)

        init = tf.global_variables_initializer()
@@ -28,7 +26,8 @@ class Autoencoder(object):

    def _initialize_weights(self):
        all_weights = dict()
-        all_weights['w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+        all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype=tf.float32))
        all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype=tf.float32))
@@ -46,7 +45,7 @@ class Autoencoder(object):

    def generate(self, hidden = None):
        if hidden is None:
-            hidden = np.random.normal(size=self.weights["b1"])
+            hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
        return self.sess.run(self.reconstruction, feed_dict={self.hidden: hidden})

    def reconstruct(self, X):

--- a/autoencoder/autoencoder_models/DenoisingAutoencoder.py
+++ b/autoencoder/autoencoder_models/DenoisingAutoencoder.py
 import tensorflow as tf
-import numpy as np
-import autoencoder.Utils
-

 class AdditiveGaussianNoiseAutoencoder(object):
    def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(),
@@ -22,7 +19,7 @@ class AdditiveGaussianNoiseAutoencoder(object):
        self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])

        # cost
-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        self.optimizer = optimizer.minimize(self.cost)

        init = tf.global_variables_initializer()
@@ -31,7 +28,8 @@ class AdditiveGaussianNoiseAutoencoder(object):

    def _initialize_weights(self):
        all_weights = dict()
-        all_weights['w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+        all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype = tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype = tf.float32))
        all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype = tf.float32))
@@ -53,9 +51,9 @@ class AdditiveGaussianNoiseAutoencoder(object):
                                                       self.scale: self.training_scale
                                                       })

-    def generate(self, hidden = None):
+    def generate(self, hidden=None):
        if hidden is None:
-            hidden = np.random.normal(size = self.weights["b1"])
+            hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
        return self.sess.run(self.reconstruction, feed_dict = {self.hidden: hidden})

    def reconstruct(self, X):
@@ -89,7 +87,7 @@ class MaskingNoiseAutoencoder(object):
        self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])

        # cost
-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        self.optimizer = optimizer.minimize(self.cost)

        init = tf.global_variables_initializer()
@@ -98,7 +96,8 @@ class MaskingNoiseAutoencoder(object):

    def _initialize_weights(self):
        all_weights = dict()
-        all_weights['w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+        all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype = tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype = tf.float32))
        all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype = tf.float32))
@@ -115,9 +114,9 @@ class MaskingNoiseAutoencoder(object):
    def transform(self, X):
        return self.sess.run(self.hidden, feed_dict = {self.x: X, self.keep_prob: 1.0})

-    def generate(self, hidden = None):
+    def generate(self, hidden=None):
        if hidden is None:
-            hidden = np.random.normal(size = self.weights["b1"])
+            hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
        return self.sess.run(self.reconstruction, feed_dict = {self.hidden: hidden})

    def reconstruct(self, X):

--- a/autoencoder/autoencoder_models/VariationalAutoencoder.py
+++ b/autoencoder/autoencoder_models/VariationalAutoencoder.py
 import tensorflow as tf
-import numpy as np
-import autoencoder.Utils

 class VariationalAutoencoder(object):

@@ -17,13 +15,13 @@ class VariationalAutoencoder(object):
        self.z_log_sigma_sq = tf.add(tf.matmul(self.x, self.weights['log_sigma_w1']), self.weights['log_sigma_b1'])

        # sample from gaussian distribution
-        eps = tf.random_normal(tf.pack([tf.shape(self.x)[0], self.n_hidden]), 0, 1, dtype = tf.float32)
-        self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))
+        eps = tf.random_normal(tf.stack([tf.shape(self.x)[0], self.n_hidden]), 0, 1, dtype = tf.float32)
+        self.z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))

        self.reconstruction = tf.add(tf.matmul(self.z, self.weights['w2']), self.weights['b2'])

        # cost
-        reconstr_loss = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
+        reconstr_loss = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq
                                           - tf.square(self.z_mean)
                                           - tf.exp(self.z_log_sigma_sq), 1)
@@ -36,8 +34,10 @@ class VariationalAutoencoder(object):

    def _initialize_weights(self):
        all_weights = dict()
-        all_weights['w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
-        all_weights['log_sigma_w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+        all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
+        all_weights['log_sigma_w1'] = tf.get_variable("log_sigma_w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
        all_weights['log_sigma_b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype=tf.float32))
@@ -56,8 +56,8 @@ class VariationalAutoencoder(object):

    def generate(self, hidden = None):
        if hidden is None:
-            hidden = np.random.normal(size=self.weights["b1"])
-        return self.sess.run(self.reconstruction, feed_dict={self.z_mean: hidden})
+            hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
+        return self.sess.run(self.reconstruction, feed_dict={self.z: hidden})

    def reconstruct(self, X):
        return self.sess.run(self.reconstruction, feed_dict={self.x: X})

--- a/cognitive_mapping_and_planning/.gitignore
+++ b/cognitive_mapping_and_planning/.gitignore
+deps
+*.pyc
+lib*.so
+lib*.so*
--- a/cognitive_mapping_and_planning/README.md
+++ b/cognitive_mapping_and_planning/README.md
+# Cognitive Mapping and Planning for Visual Navigation
+**Saurabh Gupta, James Davidson, Sergey Levine, Rahul Sukthankar, Jitendra Malik**
+
+**Computer Vision and Pattern Recognition (CVPR) 2017.**
+
+**[ArXiv](https://arxiv.org/abs/1702.03920), 
+[Project Website](https://sites.google.com/corp/view/cognitive-mapping-and-planning/)**
+
+### Citing
+If you find this code base and models useful in your research, please consider
+citing the following paper:
+  ```
+  @inproceedings{gupta2017cognitive,
+    title={Cognitive Mapping and Planning for Visual Navigation},
+    author={Gupta, Saurabh and Davidson, James and Levine, Sergey and
+      Sukthankar, Rahul and Malik, Jitendra},
+    booktitle={CVPR},
+    year={2017}
+  }
+  ```
+
+### Contents
+1.  [Requirements: software](#requirements-software)
+2.  [Requirements: data](#requirements-data)
+3.  [Test Pre-trained Models](#test-pre_trained-models)
+4.  [Train your Own Models](#train-your-own-models)
+
+### Requirements: software
+1.  Python Virtual Env Setup: All code is implemented in Python but depends on a
+    small number of python packages and a couple of C libraries. We recommend
+    using virtual environment for installing these python packages and python
+    bindings for these C libraries.
+      ```Shell
+      VENV_DIR=venv
+      pip install virtualenv
+      virtualenv $VENV_DIR
+      source $VENV_DIR/bin/activate
+      
+      # You may need to upgrade pip for installing openv-python.
+      pip install --upgrade pip
+      # Install simple dependencies.
+      pip install -r requirements.txt
+
+      # Patch bugs in dependencies.
+      sh patches/apply_patches.sh
+      ```
+
+2.  Install [Tensorflow](https://www.tensorflow.org/) inside this virtual
+    environment. Typically done with `pip install --upgrade tensorflow-gpu`.
+
+3.  Swiftshader: We use
+    [Swiftshader](https://github.com/google/swiftshader.git), a CPU based
+    renderer to render the meshes.  It is possible to use other renderers,
+    replace `SwiftshaderRenderer` in `render/swiftshader_renderer.py` with
+    bindings to your renderer. 
+    ```Shell
+    mkdir -p deps
+    git clone --recursive https://github.com/google/swiftshader.git deps/swiftshader-src
+    cd deps/swiftshader-src && git checkout 91da6b00584afd7dcaed66da88e2b617429b3950
+    mkdir build && cd build && cmake .. && make -j 16 libEGL libGLESv2
+    cd ../../../
+    cp deps/swiftshader-src/build/libEGL* libEGL.so.1
+    cp deps/swiftshader-src/build/libGLESv2* libGLESv2.so.2
+    ```
+
+4.  PyAssimp: We use [PyAssimp](https://github.com/assimp/assimp.git) to load
+    meshes.  It is possible to use other libraries to load meshes, replace
+    `Shape` `render/swiftshader_renderer.py` with bindings to your library for
+    loading meshes. 
+    ```Shell
+    mkdir -p deps
+    git clone https://github.com/assimp/assimp.git deps/assimp-src
+    cd deps/assimp-src
+    git checkout 2afeddd5cb63d14bc77b53740b38a54a97d94ee8
+    cmake CMakeLists.txt -G 'Unix Makefiles' && make -j 16
+    cd port/PyAssimp && python setup.py install
+    cd ../../../..
+    cp deps/assimp-src/lib/libassimp* .
+    ```
+
+5.  graph-tool: We use [graph-tool](https://git.skewed.de/count0/graph-tool)
+    library for graph processing.
+    ```Shell
+    mkdir -p deps
+    # If the following git clone command fails, you can also download the source
+    # from https://downloads.skewed.de/graph-tool/graph-tool-2.2.44.tar.bz2
+    git clone https://git.skewed.de/count0/graph-tool deps/graph-tool-src
+    cd deps/graph-tool-src && git checkout 178add3a571feb6666f4f119027705d95d2951ab
+    bash autogen.sh
+    ./configure --disable-cairo --disable-sparsehash --prefix=$HOME/.local
+    make -j 16
+    make install
+    cd ../../
+    ```
+
+### Requirements: data
+1.  Download the Stanford 3D Indoor Spaces Dataset (S3DIS Dataset) and ImageNet
+    Pre-trained models for initializing different models. Follow instructions in
+    `data/README.md`
+
+### Test Pre-trained Models
+1.  Download pre-trained models using
+    `scripts/scripts_download_pretrained_models.sh`
+
+2.  Test models using `scripts/script_test_pretrained_models.sh`.
+
+### Train Your Own Models
+All models were trained asynchronously with 16 workers each worker using data
+from a single floor. The default hyper-parameters correspond to this setting.
+See [distributed training with
+Tensorflow](https://www.tensorflow.org/deploy/distributed) for setting up
+distributed training. Training with a single worker is possible with the current
+code base but will require some minor changes to allow each worker to load all
+training environments.
+
+### Contact
+For questions or issues open an issue on the tensorflow/models [issues
+tracker](https://github.com/tensorflow/models/issues). Please assign issues to
+@s-gupta.
+
+### Credits
+This code was written by Saurabh Gupta (@s-gupta).
--- a/cognitive_mapping_and_planning/__init__.py
+++ b/cognitive_mapping_and_planning/__init__.py
--- a/cognitive_mapping_and_planning/cfgs/__init__.py
+++ b/cognitive_mapping_and_planning/cfgs/__init__.py
--- a/cognitive_mapping_and_planning/cfgs/config_cmp.py
+++ b/cognitive_mapping_and_planning/cfgs/config_cmp.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os, sys
+import numpy as np
+from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
+import logging
+import src.utils as utils
+import cfgs.config_common as cc
+
+
+import tensorflow as tf
+
+
+rgb_resnet_v2_50_path = 'data/init_models/resnet_v2_50/model.ckpt-5136169'
+d_resnet_v2_50_path = 'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002'
+
+def get_default_args():
+  summary_args = utils.Foo(display_interval=1, test_iters=26,
+                           arop_full_summary_iters=14)
+
+  control_args = utils.Foo(train=False, test=False,
+                           force_batchnorm_is_training_at_test=False,
+                           reset_rng_seed=False, only_eval_when_done=False,
+                           test_mode=None)
+  return summary_args, control_args
+
+def get_default_cmp_args():
+  batch_norm_param = {'center': True, 'scale': True,
+                      'activation_fn':tf.nn.relu}
+
+  mapper_arch_args = utils.Foo(
+      dim_reduce_neurons=64,
+      fc_neurons=[1024, 1024],
+      fc_out_size=8,
+      fc_out_neurons=64,
+      encoder='resnet_v2_50',
+      deconv_neurons=[64, 32, 16, 8, 4, 2],
+      deconv_strides=[2, 2, 2, 2, 2, 2],
+      deconv_layers_per_block=2,
+      deconv_kernel_size=4,
+      fc_dropout=0.5,
+      combine_type='wt_avg_logits',
+      batch_norm_param=batch_norm_param)
+
+  readout_maps_arch_args = utils.Foo(
+      num_neurons=[],
+      strides=[],
+      kernel_size=None,
+      layers_per_block=None)
+
+  arch_args = utils.Foo(
+      vin_val_neurons=8, vin_action_neurons=8, vin_ks=3, vin_share_wts=False,
+      pred_neurons=[64, 64], pred_batch_norm_param=batch_norm_param,
+      conv_on_value_map=0, fr_neurons=16, fr_ver='v2', fr_inside_neurons=64,
+      fr_stride=1, crop_remove_each=30, value_crop_size=4,
+      action_sample_type='sample', action_sample_combine_type='one_or_other',
+      sample_gt_prob_type='inverse_sigmoid_decay', dagger_sample_bn_false=True,
+      vin_num_iters=36, isd_k=750., use_agent_loc=False, multi_scale=True,
+      readout_maps=False, rom_arch=readout_maps_arch_args)
+
+  return arch_args, mapper_arch_args
+
+def get_arch_vars(arch_str):
+  if arch_str == '': vals = []
+  else: vals = arch_str.split('_')
+  ks = ['var1', 'var2', 'var3']
+  ks = ks[:len(vals)]
+  
+  # Exp Ver.
+  if len(vals) == 0: ks.append('var1'); vals.append('v0')
+  # custom arch.
+  if len(vals) == 1: ks.append('var2'); vals.append('')
+  # map scape for projection baseline.
+  if len(vals) == 2: ks.append('var3'); vals.append('fr2')
+
+  assert(len(vals) == 3)
+
+  vars = utils.Foo()
+  for k, v in zip(ks, vals):
+    setattr(vars, k, v)
+
+  logging.error('arch_vars: %s', vars)
+  return vars
+
+def process_arch_str(args, arch_str):
+  # This function modifies args.
+  args.arch, args.mapper_arch = get_default_cmp_args()
+
+  arch_vars = get_arch_vars(arch_str)
+
+  args.navtask.task_params.outputs.ego_maps = True
+  args.navtask.task_params.outputs.ego_goal_imgs = True
+  args.navtask.task_params.outputs.egomotion = True
+  args.navtask.task_params.toy_problem = False
+
+  if arch_vars.var1 == 'lmap':
+    args = process_arch_learned_map(args, arch_vars)
+
+  elif arch_vars.var1 == 'pmap':
+    args = process_arch_projected_map(args, arch_vars)
+
+  else:
+    logging.fatal('arch_vars.var1 should be lmap or pmap, but is %s', arch_vars.var1)
+    assert(False)
+
+  return args
+
+def process_arch_learned_map(args, arch_vars):
+  # Multiscale vision based system.
+  args.navtask.task_params.input_type = 'vision'
+  args.navtask.task_params.outputs.images = True
+  
+  if args.navtask.camera_param.modalities[0] == 'rgb':
+    args.solver.pretrained_path = rgb_resnet_v2_50_path
+  elif args.navtask.camera_param.modalities[0] == 'depth':
+    args.solver.pretrained_path = d_resnet_v2_50_path
+
+  if arch_vars.var2 == 'Ssc':
+    sc = 1./args.navtask.task_params.step_size
+    args.arch.vin_num_iters = 40
+    args.navtask.task_params.map_scales = [sc]
+    max_dist = args.navtask.task_params.max_dist * \
+        args.navtask.task_params.num_goals
+    args.navtask.task_params.map_crop_sizes = [2*max_dist]
+
+    args.arch.fr_stride = 1
+    args.arch.vin_action_neurons = 8
+    args.arch.vin_val_neurons = 3
+    args.arch.fr_inside_neurons = 32
+
+    args.mapper_arch.pad_map_with_zeros_each = [24]
+    args.mapper_arch.deconv_neurons = [64, 32, 16]
+    args.mapper_arch.deconv_strides = [1, 2, 1]
+
+  elif (arch_vars.var2 == 'Msc' or arch_vars.var2 == 'MscROMms' or
+        arch_vars.var2 == 'MscROMss' or arch_vars.var2 == 'MscNoVin'):
+    # Code for multi-scale planner.
+    args.arch.vin_num_iters = 8
+    args.arch.crop_remove_each = 4
+    args.arch.value_crop_size = 8
+
+    sc = 1./args.navtask.task_params.step_size
+    max_dist = args.navtask.task_params.max_dist * \
+        args.navtask.task_params.num_goals
+    n_scales = np.log2(float(max_dist) / float(args.arch.vin_num_iters))
+    n_scales = int(np.ceil(n_scales)+1)
+
+    args.navtask.task_params.map_scales = \
+        list(sc*(0.5**(np.arange(n_scales))[::-1]))
+    args.navtask.task_params.map_crop_sizes = [16 for x in range(n_scales)]
+
+    args.arch.fr_stride = 1
+    args.arch.vin_action_neurons = 8
+    args.arch.vin_val_neurons = 3
+    args.arch.fr_inside_neurons = 32
+
+    args.mapper_arch.pad_map_with_zeros_each = [0 for _ in range(n_scales)]
+    args.mapper_arch.deconv_neurons = [64*n_scales, 32*n_scales, 16*n_scales]
+    args.mapper_arch.deconv_strides = [1, 2, 1]
+
+    if arch_vars.var2 == 'MscNoVin':
+      # No planning version.
+      args.arch.fr_stride = [1, 2, 1, 2]
+      args.arch.vin_action_neurons = None
+      args.arch.vin_val_neurons = 16
+      args.arch.fr_inside_neurons = 32
+
+      args.arch.crop_remove_each = 0
+      args.arch.value_crop_size = 4
+      args.arch.vin_num_iters = 0
+
+    elif arch_vars.var2 == 'MscROMms' or arch_vars.var2 == 'MscROMss':
+      # Code with read outs, MscROMms flattens and reads out,
+      # MscROMss does not flatten and produces output at multiple scales.
+      args.navtask.task_params.outputs.readout_maps = True
+      args.navtask.task_params.map_resize_method = 'antialiasing'
+      args.arch.readout_maps = True
+
+      if arch_vars.var2 == 'MscROMms':
+        args.arch.rom_arch.num_neurons = [64, 1]
+        args.arch.rom_arch.kernel_size = 4
+        args.arch.rom_arch.strides = [2,2]
+        args.arch.rom_arch.layers_per_block = 2
+
+        args.navtask.task_params.readout_maps_crop_sizes = [64]
+        args.navtask.task_params.readout_maps_scales = [sc]
+
+      elif arch_vars.var2 == 'MscROMss':
+        args.arch.rom_arch.num_neurons = \
+            [64, len(args.navtask.task_params.map_scales)]
+        args.arch.rom_arch.kernel_size = 4
+        args.arch.rom_arch.strides = [1,1]
+        args.arch.rom_arch.layers_per_block = 1
+
+        args.navtask.task_params.readout_maps_crop_sizes = \
+            args.navtask.task_params.map_crop_sizes
+        args.navtask.task_params.readout_maps_scales = \
+            args.navtask.task_params.map_scales
+
+  else:
+    logging.fatal('arch_vars.var2 not one of Msc, MscROMms, MscROMss, MscNoVin.')
+    assert(False)
+
+  map_channels = args.mapper_arch.deconv_neurons[-1] / \
+    (2*len(args.navtask.task_params.map_scales))
+  args.navtask.task_params.map_channels = map_channels
+  
+  return args
+
+def process_arch_projected_map(args, arch_vars):
+  # Single scale vision based system which does not use a mapper but instead
+  # uses an analytically estimated map.
+  ds = int(arch_vars.var3[2])
+  args.navtask.task_params.input_type = 'analytical_counts'
+  args.navtask.task_params.outputs.analytical_counts = True
+
+  assert(args.navtask.task_params.modalities[0] == 'depth')
+  args.navtask.camera_param.img_channels = None
+
+  analytical_counts = utils.Foo(map_sizes=[512/ds],
+                                xy_resolution=[5.*ds],
+                                z_bins=[[-10, 10, 150, 200]],
+                                non_linearity=[arch_vars.var2])
+  args.navtask.task_params.analytical_counts = analytical_counts
+
+  sc = 1./ds
+  args.arch.vin_num_iters = 36
+  args.navtask.task_params.map_scales = [sc]
+  args.navtask.task_params.map_crop_sizes = [512/ds]
+
+  args.arch.fr_stride = [1,2]
+  args.arch.vin_action_neurons = 8
+  args.arch.vin_val_neurons = 3
+  args.arch.fr_inside_neurons = 32
+
+  map_channels = len(analytical_counts.z_bins[0]) + 1
+  args.navtask.task_params.map_channels = map_channels
+  args.solver.freeze_conv = False
+
+  return args
+
+def get_args_for_config(config_name):
+  args = utils.Foo()
+
+  args.summary, args.control = get_default_args()
+
+  exp_name, mode_str = config_name.split('+')
+  arch_str, solver_str, navtask_str = exp_name.split('.')
+  logging.error('config_name: %s', config_name)
+  logging.error('arch_str: %s', arch_str)
+  logging.error('navtask_str: %s', navtask_str)
+  logging.error('solver_str: %s', solver_str)
+  logging.error('mode_str: %s', mode_str)
+
+  args.solver = cc.process_solver_str(solver_str)
+  args.navtask = cc.process_navtask_str(navtask_str)
+
+  args = process_arch_str(args, arch_str)
+  args.arch.isd_k = args.solver.isd_k
+
+  # Train, test, etc.
+  mode, imset = mode_str.split('_')
+  args = cc.adjust_args_for_mode(args, mode)
+  args.navtask.building_names = args.navtask.dataset.get_split(imset)
+  args.control.test_name = '{:s}_on_{:s}'.format(mode, imset)
+
+  # Log the arguments
+  logging.error('%s', args)
+  return args
--- a/cognitive_mapping_and_planning/cfgs/config_common.py
+++ b/cognitive_mapping_and_planning/cfgs/config_common.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import numpy as np
+import logging
+import src.utils as utils
+import datasets.nav_env_config as nec
+from datasets import factory
+
+def adjust_args_for_mode(args, mode):
+  if mode == 'train':
+    args.control.train = True
+  
+  elif mode == 'val1':
+    # Same settings as for training, to make sure nothing wonky is happening
+    # there.
+    args.control.test = True
+    args.control.test_mode = 'val'
+    args.navtask.task_params.batch_size = 32
+
+  elif mode == 'val2':
+    # No data augmentation, not sampling but taking the argmax action, not
+    # sampling from the ground truth at all.
+    args.control.test = True
+    args.arch.action_sample_type = 'argmax'
+    args.arch.sample_gt_prob_type = 'zero'
+    args.navtask.task_params.data_augment = \
+      utils.Foo(lr_flip=0, delta_angle=0, delta_xy=0, relight=False,
+                relight_fast=False, structured=False)
+    args.control.test_mode = 'val'
+    args.navtask.task_params.batch_size = 32
+
+  elif mode == 'bench':
+    # Actually testing the agent in settings that are kept same between
+    # different runs.
+    args.navtask.task_params.batch_size = 16
+    args.control.test = True
+    args.arch.action_sample_type = 'argmax'
+    args.arch.sample_gt_prob_type = 'zero'
+    args.navtask.task_params.data_augment = \
+      utils.Foo(lr_flip=0, delta_angle=0, delta_xy=0, relight=False,
+                relight_fast=False, structured=False)
+    args.summary.test_iters = 250
+    args.control.only_eval_when_done = True
+    args.control.reset_rng_seed = True
+    args.control.test_mode = 'test'
+  else:
+    logging.fatal('Unknown mode: %s.', mode)
+    assert(False)
+  return args
+
+def get_solver_vars(solver_str):
+  if solver_str == '': vals = []; 
+  else: vals = solver_str.split('_')
+  ks = ['clip', 'dlw', 'long', 'typ', 'isdk', 'adam_eps', 'init_lr'];
+  ks = ks[:len(vals)]
+
+  # Gradient clipping or not.
+  if len(vals) == 0: ks.append('clip'); vals.append('noclip');
+  # data loss weight.
+  if len(vals) == 1: ks.append('dlw');  vals.append('dlw20')
+  # how long to train for.
+  if len(vals) == 2: ks.append('long');  vals.append('nolong')
+  # Adam
+  if len(vals) == 3: ks.append('typ');  vals.append('adam2')
+  # reg loss wt
+  if len(vals) == 4: ks.append('rlw');  vals.append('rlw1')
+  # isd_k
+  if len(vals) == 5: ks.append('isdk');  vals.append('isdk415') # 415, inflexion at 2.5k.
+  # adam eps
+  if len(vals) == 6: ks.append('adam_eps');  vals.append('aeps1en8')
+  # init lr
+  if len(vals) == 7: ks.append('init_lr');  vals.append('lr1en3')
+
+  assert(len(vals) == 8)
+  
+  vars = utils.Foo()
+  for k, v in zip(ks, vals):
+    setattr(vars, k, v)
+  logging.error('solver_vars: %s', vars)
+  return vars
+
+def process_solver_str(solver_str):
+  solver = utils.Foo(
+      seed=0, learning_rate_decay=None, clip_gradient_norm=None, max_steps=None,
+      initial_learning_rate=None, momentum=None, steps_per_decay=None,
+      logdir=None, sync=False, adjust_lr_sync=True, wt_decay=0.0001,
+      data_loss_wt=None, reg_loss_wt=None, freeze_conv=True, num_workers=1,
+      task=0, ps_tasks=0, master='local', typ=None, momentum2=None,
+      adam_eps=None)
+
+  # Clobber with overrides from solver str.
+  solver_vars = get_solver_vars(solver_str)
+
+  solver.data_loss_wt          = float(solver_vars.dlw[3:].replace('x', '.'))
+  solver.adam_eps              = float(solver_vars.adam_eps[4:].replace('x', '.').replace('n', '-'))
+  solver.initial_learning_rate = float(solver_vars.init_lr[2:].replace('x', '.').replace('n', '-'))
+  solver.reg_loss_wt           = float(solver_vars.rlw[3:].replace('x', '.'))
+  solver.isd_k                 = float(solver_vars.isdk[4:].replace('x', '.'))
+
+  long = solver_vars.long
+  if long == 'long':
+    solver.steps_per_decay = 40000
+    solver.max_steps = 120000
+  elif long == 'long2':
+    solver.steps_per_decay = 80000
+    solver.max_steps = 120000
+  elif long == 'nolong' or long == 'nol':
+    solver.steps_per_decay = 20000
+    solver.max_steps = 60000
+  else:
+    logging.fatal('solver_vars.long should be long, long2, nolong or nol.')
+    assert(False)
+
+  clip = solver_vars.clip
+  if clip == 'noclip' or clip == 'nocl':
+    solver.clip_gradient_norm = 0
+  elif clip[:4] == 'clip':
+    solver.clip_gradient_norm = float(clip[4:].replace('x', '.'))
+  else:
+    logging.fatal('Unknown solver_vars.clip: %s', clip)
+    assert(False)
+
+  typ = solver_vars.typ
+  if typ == 'adam':
+    solver.typ = 'adam'
+    solver.momentum = 0.9
+    solver.momentum2 = 0.999
+    solver.learning_rate_decay = 1.0
+  elif typ == 'adam2':
+    solver.typ = 'adam'
+    solver.momentum = 0.9
+    solver.momentum2 = 0.999
+    solver.learning_rate_decay = 0.1
+  elif typ == 'sgd':
+    solver.typ = 'sgd'
+    solver.momentum = 0.99
+    solver.momentum2 = None
+    solver.learning_rate_decay = 0.1
+  else:
+    logging.fatal('Unknown solver_vars.typ: %s', typ)
+    assert(False)
+
+  logging.error('solver: %s', solver)
+  return solver
+
+def get_navtask_vars(navtask_str):
+  if navtask_str == '': vals = []
+  else: vals = navtask_str.split('_')
+
+  ks_all = ['dataset_name', 'modality', 'task', 'history', 'max_dist',
+            'num_steps', 'step_size', 'n_ori', 'aux_views', 'data_aug']
+  ks = ks_all[:len(vals)]
+
+  # All data or not.
+  if len(vals) == 0: ks.append('dataset_name'); vals.append('sbpd')
+  # modality
+  if len(vals) == 1: ks.append('modality'); vals.append('rgb')
+  # semantic task?
+  if len(vals) == 2: ks.append('task'); vals.append('r2r')
+  # number of history frames.
+  if len(vals) == 3: ks.append('history'); vals.append('h0')
+  # max steps
+  if len(vals) == 4: ks.append('max_dist'); vals.append('32')
+  # num steps
+  if len(vals) == 5: ks.append('num_steps'); vals.append('40')
+  # step size
+  if len(vals) == 6: ks.append('step_size'); vals.append('8')
+  # n_ori
+  if len(vals) == 7: ks.append('n_ori'); vals.append('4')
+  # Auxiliary views.
+  if len(vals) == 8: ks.append('aux_views'); vals.append('nv0')
+  # Normal data augmentation as opposed to structured data augmentation (if set
+  # to straug.
+  if len(vals) == 9: ks.append('data_aug'); vals.append('straug')
+
+  assert(len(vals) == 10)
+  for i in range(len(ks)):
+    assert(ks[i] == ks_all[i])
+
+  vars = utils.Foo()
+  for k, v in zip(ks, vals):
+    setattr(vars, k, v)
+  logging.error('navtask_vars: %s', vals)
+  return vars
+
+def process_navtask_str(navtask_str):
+  navtask = nec.nav_env_base_config()
+  
+  # Clobber with overrides from strings.
+  navtask_vars = get_navtask_vars(navtask_str)
+
+  navtask.task_params.n_ori = int(navtask_vars.n_ori)
+  navtask.task_params.max_dist = int(navtask_vars.max_dist)
+  navtask.task_params.num_steps = int(navtask_vars.num_steps)
+  navtask.task_params.step_size = int(navtask_vars.step_size)
+  navtask.task_params.data_augment.delta_xy = int(navtask_vars.step_size)/2.
+  n_aux_views_each = int(navtask_vars.aux_views[2])
+  aux_delta_thetas = np.concatenate((np.arange(n_aux_views_each) + 1,
+                                     -1 -np.arange(n_aux_views_each)))
+  aux_delta_thetas = aux_delta_thetas*np.deg2rad(navtask.camera_param.fov)
+  navtask.task_params.aux_delta_thetas = aux_delta_thetas
+  
+  if navtask_vars.data_aug == 'aug':
+    navtask.task_params.data_augment.structured = False
+  elif navtask_vars.data_aug == 'straug':
+    navtask.task_params.data_augment.structured = True
+  else:
+    logging.fatal('Unknown navtask_vars.data_aug %s.', navtask_vars.data_aug)
+    assert(False)
+
+  navtask.task_params.num_history_frames = int(navtask_vars.history[1:])
+  navtask.task_params.n_views = 1+navtask.task_params.num_history_frames
+  
+  navtask.task_params.goal_channels = int(navtask_vars.n_ori)
+  
+  if navtask_vars.task == 'hard': 
+    navtask.task_params.type = 'rng_rejection_sampling_many'
+    navtask.task_params.rejection_sampling_M = 2000
+    navtask.task_params.min_dist = 10
+  elif navtask_vars.task == 'r2r':
+    navtask.task_params.type = 'room_to_room_many'
+  elif navtask_vars.task == 'ST':
+    # Semantic task at hand.
+    navtask.task_params.goal_channels = \
+        len(navtask.task_params.semantic_task.class_map_names)
+    navtask.task_params.rel_goal_loc_dim = \
+        len(navtask.task_params.semantic_task.class_map_names)
+    navtask.task_params.type = 'to_nearest_obj_acc'
+  else:
+    logging.fatal('navtask_vars.task: should be hard or r2r, ST')
+    assert(False)
+  
+  if navtask_vars.modality == 'rgb':
+    navtask.camera_param.modalities = ['rgb']
+    navtask.camera_param.img_channels = 3
+  elif navtask_vars.modality == 'd':
+    navtask.camera_param.modalities = ['depth']
+    navtask.camera_param.img_channels = 2
+  
+  navtask.task_params.img_height   = navtask.camera_param.height
+  navtask.task_params.img_width    = navtask.camera_param.width
+  navtask.task_params.modalities   = navtask.camera_param.modalities
+  navtask.task_params.img_channels = navtask.camera_param.img_channels
+  navtask.task_params.img_fov      = navtask.camera_param.fov
+  
+  navtask.dataset = factory.get_dataset(navtask_vars.dataset_name)
+  return navtask
--- a/cognitive_mapping_and_planning/cfgs/config_distill.py
+++ b/cognitive_mapping_and_planning/cfgs/config_distill.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import pprint
+import copy
+import os
+from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
+import logging
+import src.utils as utils
+import cfgs.config_common as cc
+
+
+import tensorflow as tf
+
+rgb_resnet_v2_50_path = 'cache/resnet_v2_50_inception_preprocessed/model.ckpt-5136169'
+
+def get_default_args():
+  robot = utils.Foo(radius=15, base=10, height=140, sensor_height=120,
+                    camera_elevation_degree=-15)
+
+  camera_param = utils.Foo(width=225, height=225, z_near=0.05, z_far=20.0,
+                           fov=60., modalities=['rgb', 'depth'])
+
+  env = utils.Foo(padding=10, resolution=5, num_point_threshold=2,
+                  valid_min=-10, valid_max=200, n_samples_per_face=200)
+
+  data_augment = utils.Foo(lr_flip=0, delta_angle=1, delta_xy=4, relight=False,
+                           relight_fast=False, structured=False)
+
+  task_params = utils.Foo(num_actions=4, step_size=4, num_steps=0,
+                          batch_size=32, room_seed=0, base_class='Building',
+                          task='mapping', n_ori=6, data_augment=data_augment,
+                          output_transform_to_global_map=False,
+                          output_canonical_map=False,
+                          output_incremental_transform=False,
+                          output_free_space=False, move_type='shortest_path',
+                          toy_problem=0)
+
+  buildinger_args = utils.Foo(building_names=['area1_gates_wingA_floor1_westpart'],
+                              env_class=None, robot=robot, 
+                              task_params=task_params, env=env,
+                              camera_param=camera_param)
+
+  solver_args = utils.Foo(seed=0, learning_rate_decay=0.1,
+                          clip_gradient_norm=0, max_steps=120000,
+                          initial_learning_rate=0.001, momentum=0.99,
+                          steps_per_decay=40000, logdir=None, sync=False,
+                          adjust_lr_sync=True, wt_decay=0.0001,
+                          data_loss_wt=1.0, reg_loss_wt=1.0,
+                          num_workers=1, task=0, ps_tasks=0, master='local')
+
+  summary_args = utils.Foo(display_interval=1, test_iters=100)
+
+  control_args = utils.Foo(train=False, test=False,
+                           force_batchnorm_is_training_at_test=False)
+  
+  arch_args = utils.Foo(rgb_encoder='resnet_v2_50', d_encoder='resnet_v2_50')
+
+  return utils.Foo(solver=solver_args,
+                   summary=summary_args, control=control_args, arch=arch_args,
+                   buildinger=buildinger_args)
+
+def get_vars(config_name):
+  vars = config_name.split('_')
+  if len(vars) == 1: # All data or not.
+    vars.append('noall')
+  if len(vars) == 2: # n_ori
+    vars.append('4')
+  logging.error('vars: %s', vars)
+  return vars
+
+def get_args_for_config(config_name):
+  args = get_default_args()
+  config_name, mode = config_name.split('+')
+  vars = get_vars(config_name)
+  
+  logging.info('config_name: %s, mode: %s', config_name, mode)
+  
+  args.buildinger.task_params.n_ori = int(vars[2])
+  args.solver.freeze_conv = True
+  args.solver.pretrained_path = resnet_v2_50_path
+  args.buildinger.task_params.img_channels = 5
+  args.solver.data_loss_wt = 0.00001
+ 
+  if vars[0] == 'v0':
+    None
+  else:
+    logging.error('config_name: %s undefined', config_name)
+
+  args.buildinger.task_params.height = args.buildinger.camera_param.height
+  args.buildinger.task_params.width = args.buildinger.camera_param.width
+  args.buildinger.task_params.modalities = args.buildinger.camera_param.modalities
+  
+  if vars[1] == 'all':
+    args = cc.get_args_for_mode_building_all(args, mode)
+  elif vars[1] == 'noall':
+    args = cc.get_args_for_mode_building(args, mode)
+  
+  # Log the arguments
+  logging.error('%s', args)
+  return args