Merge remote-tracking branch 'upstream/master'

44fa1d37 · Alex Lee · d3628a74 · 6e367f67 · 44fa1d37 · 44fa1d37
Commit 44fa1d37 authored Jun 29, 2017 by Alex Lee
20 changed files
--- a/attention_ocr/python/model_test.py
+++ b/attention_ocr/python/model_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the model."""
+import numpy as np
+import string
+import tensorflow as tf
+from tensorflow.contrib import slim
+from tensorflow.contrib.tfprof import model_analyzer
+import model
+import data_provider
+def create_fake_charset(num_char_classes):
+  charset = {}
+  for i in xrange(num_char_classes):
+    charset[i] = string.printable[i % len(string.printable)]
+  return charset
+class ModelTest(tf.test.TestCase):
+  def setUp(self):
+    tf.test.TestCase.setUp(self)
+    self.rng = np.random.RandomState([11, 23, 50])
+    self.batch_size = 4
+    self.image_width = 600
+    self.image_height = 30
+    self.seq_length = 40
+    self.num_char_classes = 72
+    self.null_code = 62
+    self.num_views = 4
+    feature_size = 288
+    self.conv_tower_shape = (self.batch_size, 1, 72, feature_size)
+    self.features_shape = (self.batch_size, self.seq_length, feature_size)
+    self.chars_logit_shape = (self.batch_size, self.seq_length,
+                              self.num_char_classes)
+    self.length_logit_shape = (self.batch_size, self.seq_length + 1)
+    self.initialize_fakes()
+  def initialize_fakes(self):
+    self.images_shape = (self.batch_size, self.image_height, self.image_width,
+                         3)
+    self.fake_images = tf.constant(
+        self.rng.randint(low=0, high=255,
+                         size=self.images_shape).astype('float32'),
+        name='input_node')
+    self.fake_conv_tower_np = tf.constant(
+        self.rng.randn(*self.conv_tower_shape).astype('float32'))
+    self.fake_logits = tf.constant(
+        self.rng.randn(*self.chars_logit_shape).astype('float32'))
+    self.fake_labels = tf.constant(
+        self.rng.randint(
+            low=0,
+            high=self.num_char_classes,
+            size=(self.batch_size, self.seq_length)).astype('int64'))
+  def create_model(self):
+    return model.Model(
+        self.num_char_classes, self.seq_length, num_views=4, null_code=62)
+  def test_char_related_shapes(self):
+    ocr_model = self.create_model()
+    with self.test_session() as sess:
+      endpoints_tf = ocr_model.create_base(
+          images=self.fake_images, labels_one_hot=None)
+      sess.run(tf.global_variables_initializer())
+      endpoints = sess.run(endpoints_tf)
+      self.assertEqual((self.batch_size, self.seq_length,
+                        self.num_char_classes), endpoints.chars_logit.shape)
+      self.assertEqual((self.batch_size, self.seq_length,
+                        self.num_char_classes), endpoints.chars_log_prob.shape)
+      self.assertEqual((self.batch_size, self.seq_length),
+                       endpoints.predicted_chars.shape)
+      self.assertEqual((self.batch_size, self.seq_length),
+                       endpoints.predicted_scores.shape)
+  def test_predicted_scores_are_within_range(self):
+    ocr_model = self.create_model()
+    _, _, scores = ocr_model.char_predictions(self.fake_logits)
+    with self.test_session() as sess:
+      scores_np = sess.run(scores)
+    values_in_range = (scores_np >= 0.0) & (scores_np <= 1.0)
+    self.assertTrue(
+        np.all(values_in_range),
+        msg=('Scores contains out of the range values %s' %
+             scores_np[np.logical_not(values_in_range)]))
+  def test_conv_tower_shape(self):
+    with self.test_session() as sess:
+      ocr_model = self.create_model()
+      conv_tower = ocr_model.conv_tower_fn(self.fake_images)
+      sess.run(tf.global_variables_initializer())
+      conv_tower_np = sess.run(conv_tower)
+      self.assertEqual(self.conv_tower_shape, conv_tower_np.shape)
+  def test_model_size_less_then1_gb(self):
+    # NOTE: Actual amount of memory occupied my TF during training will be at
+    # least 4X times bigger because of space need to store original weights,
+    # updates, gradients and variances. It also depends on the type of used
+    # optimizer.
+    ocr_model = self.create_model()
+    ocr_model.create_base(images=self.fake_images, labels_one_hot=None)
+    with self.test_session() as sess:
+      tfprof_root = model_analyzer.print_model_analysis(
+          sess.graph,
+          tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
+      model_size_bytes = 4 * tfprof_root.total_parameters
+      self.assertLess(model_size_bytes, 1 * 2**30)
+  def test_create_summaries_is_runnable(self):
+    ocr_model = self.create_model()
+    data = data_provider.InputEndpoints(
+        images=self.fake_images,
+        images_orig=self.fake_images,
+        labels=self.fake_labels,
+        labels_one_hot=slim.one_hot_encoding(self.fake_labels,
+                                             self.num_char_classes))
+    endpoints = ocr_model.create_base(
+        images=self.fake_images, labels_one_hot=None)
+    charset = create_fake_charset(self.num_char_classes)
+    summaries = ocr_model.create_summaries(
+        data, endpoints, charset, is_training=False)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(tf.local_variables_initializer())
+      tf.tables_initializer().run()
+      sess.run(summaries)  # just check it is runnable
+  def test_sequence_loss_function_without_label_smoothing(self):
+    model = self.create_model()
+    model.set_mparam('sequence_loss_fn', label_smoothing=0)
+    loss = model.sequence_loss_fn(self.fake_logits, self.fake_labels)
+    with self.test_session() as sess:
+      loss_np = sess.run(loss)
+    # This test checks that the loss function is 'runnable'.
+    self.assertEqual(loss_np.shape, tuple())
+class CharsetMapperTest(tf.test.TestCase):
+  def test_text_corresponds_to_ids(self):
+    charset = create_fake_charset(36)
+    ids = tf.constant(
+        [[17, 14, 21, 21, 24], [32, 24, 27, 21, 13]], dtype=tf.int64)
+    charset_mapper = model.CharsetMapper(charset)
+    with self.test_session() as sess:
+      tf.tables_initializer().run()
+      text = sess.run(charset_mapper.get_text(ids))
+    self.assertAllEqual(text, ['hello', 'world'])
+if __name__ == '__main__':
+  tf.test.main()
--- a/attention_ocr/python/sequence_layers.py
+++ b/attention_ocr/python/sequence_layers.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Various implementations of sequence layers for character prediction.
+A 'sequence layer' is a part of a computation graph which is responsible of
+producing a sequence of characters using extracted image features. There are
+many reasonable ways to implement such layers. All of them are using RNNs.
+This module provides implementations which uses 'attention' mechanism to
+spatially 'pool' image features and also can use a previously predicted
+character to predict the next (aka auto regression).
+Usage:
+  Select one of available classes, e.g. Attention or use a wrapper function to
+  pick one based on your requirements:
+  layer_class = sequence_layers.get_layer_class(use_attention=True,
+                                                use_autoregression=True)
+  layer = layer_class(net, labels_one_hot, model_params, method_params)
+  char_logits = layer.create_logits()
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import collections
+import abc
+import logging
+import numpy as np
+import tensorflow as tf
+from tensorflow.contrib import slim
+def orthogonal_initializer(shape, dtype=tf.float32, *args, **kwargs):
+  """Generates orthonormal matrices with random values.
+  Orthonormal initialization is important for RNNs:
+    http://arxiv.org/abs/1312.6120
+    http://smerity.com/articles/2016/orthogonal_init.html
+  For non-square shapes the returned matrix will be semi-orthonormal: if the
+  number of columns exceeds the number of rows, then the rows are orthonormal
+  vectors; but if the number of rows exceeds the number of columns, then the
+  columns are orthonormal vectors.
+  We use SVD decomposition to generate an orthonormal matrix with random
+  values. The same way as it is done in the Lasagne library for Theano. Note
+  that both u and v returned by the svd are orthogonal and random. We just need
+  to pick one with the right shape.
+  Args:
+    shape: a shape of the tensor matrix to initialize.
+    dtype: a dtype of the initialized tensor.
+    *args: not used.
+    **kwargs: not used.
+  Returns:
+    An initialized tensor.
+  """
+  del args
+  del kwargs
+  flat_shape = (shape[0], np.prod(shape[1:]))
+  w = np.random.randn(*flat_shape)
+  u, _, v = np.linalg.svd(w, full_matrices=False)
+  w = u if u.shape == flat_shape else v
+  return tf.constant(w.reshape(shape), dtype=dtype)
+SequenceLayerParams = collections.namedtuple('SequenceLogitsParams', [
+    'num_lstm_units', 'weight_decay', 'lstm_state_clip_value'
+])
+class SequenceLayerBase(object):
+  """A base abstruct class for all sequence layers.
+  A child class has to define following methods:
+    get_train_input
+    get_eval_input
+    unroll_cell
+  """
+  __metaclass__ = abc.ABCMeta
+  def __init__(self, net, labels_one_hot, model_params, method_params):
+    """Stores argument in member variable for further use.
+    Args:
+      net: A tensor with shape [batch_size, num_features, feature_size] which
+        contains some extracted image features.
+      labels_one_hot: An optional (can be None) ground truth labels for the
+        input features. Is a tensor with shape
+        [batch_size, seq_length, num_char_classes]
+      model_params: A namedtuple with model parameters (model.ModelParams).
+      method_params: A SequenceLayerParams instance.
+    """
+    self._params = model_params
+    self._mparams = method_params
+    self._net = net
+    self._labels_one_hot = labels_one_hot
+    self._batch_size = net.get_shape().dims[0].value
+    # Initialize parameters for char logits which will be computed on the fly
+    # inside an LSTM decoder.
+    self._char_logits = {}
+    regularizer = slim.l2_regularizer(self._mparams.weight_decay)
+    self._softmax_w = slim.model_variable(
+        'softmax_w',
+        [self._mparams.num_lstm_units, self._params.num_char_classes],
+        initializer=orthogonal_initializer,
+        regularizer=regularizer)
+    self._softmax_b = slim.model_variable(
+        'softmax_b', [self._params.num_char_classes],
+        initializer=tf.zeros_initializer(),
+        regularizer=regularizer)
+  @abc.abstractmethod
+  def get_train_input(self, prev, i):
+    """Returns a sample to be used to predict a character during training.
+    This function is used as a loop_function for an RNN decoder.
+    Args:
+      prev: output tensor from previous step of the RNN. A tensor with shape:
+        [batch_size, num_char_classes].
+      i: index of a character in the output sequence.
+    Returns:
+      A tensor with shape [batch_size, ?] - depth depends on implementation
+      details.
+    """
+    pass
+  @abc.abstractmethod
+  def get_eval_input(self, prev, i):
+    """Returns a sample to be used to predict a character during inference.
+    This function is used as a loop_function for an RNN decoder.
+    Args:
+      prev: output tensor from previous step of the RNN. A tensor with shape:
+        [batch_size, num_char_classes].
+      i: index of a character in the output sequence.
+    Returns:
+      A tensor with shape [batch_size, ?] - depth depends on implementation
+      details.
+    """
+    raise AssertionError('Not implemented')
+  @abc.abstractmethod
+  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
+    """Unrolls an RNN cell for all inputs.
+    This is a placeholder to call some RNN decoder. It has a similar to
+    tf.seq2seq.rnn_decode interface.
+    Args:
+      decoder_inputs: A list of 2D Tensors* [batch_size x input_size]. In fact,
+        most of existing decoders in presence of a loop_function use only the
+        first element to determine batch_size and length of the list to
+        determine number of steps.
+      initial_state: 2D Tensor with shape [batch_size x cell.state_size].
+      loop_function: function will be applied to the i-th output in order to
+        generate the i+1-st input (see self.get_input).
+      cell: rnn_cell.RNNCell defining the cell function and size.
+    Returns:
+      A tuple of the form (outputs, state), where:
+        outputs: A list of character logits of the same length as
+        decoder_inputs of 2D Tensors with shape [batch_size x num_characters].
+        state: The state of each cell at the final time-step.
+          It is a 2D Tensor of shape [batch_size x cell.state_size].
+    """
+    pass
+  def is_training(self):
+    """Returns True if the layer is created for training stage."""
+    return self._labels_one_hot is not None
+  def char_logit(self, inputs, char_index):
+    """Creates logits for a character if required.
+    Args:
+      inputs: A tensor with shape [batch_size, ?] (depth is implementation
+        dependent).
+      char_index: A integer index of a character in the output sequence.
+    Returns:
+      A tensor with shape [batch_size, num_char_classes]
+    """
+    if char_index not in self._char_logits:
+      self._char_logits[char_index] = tf.nn.xw_plus_b(inputs, self._softmax_w,
+                                                      self._softmax_b)
+    return self._char_logits[char_index]
+  def char_one_hot(self, logit):
+    """Creates one hot encoding for a logit of a character.
+    Args:
+      logit: A tensor with shape [batch_size, num_char_classes].
+    Returns:
+      A tensor with shape [batch_size, num_char_classes]
+    """
+    prediction = tf.argmax(logit, dimension=1)
+    return slim.one_hot_encoding(prediction, self._params.num_char_classes)
+  def get_input(self, prev, i):
+    """A wrapper for get_train_input and get_eval_input.
+    Args:
+      prev: output tensor from previous step of the RNN. A tensor with shape:
+        [batch_size, num_char_classes].
+      i: index of a character in the output sequence.
+    Returns:
+      A tensor with shape [batch_size, ?] - depth depends on implementation
+      details.
+    """
+    if self.is_training():
+      return self.get_train_input(prev, i)
+    else:
+      return self.get_eval_input(prev, i)
+  def create_logits(self):
+    """Creates character sequence logits for a net specified in the constructor.
+    A "main" method for the sequence layer which glues together all pieces.
+    Returns:
+      A tensor with shape [batch_size, seq_length, num_char_classes].
+    """
+    with tf.variable_scope('LSTM'):
+      first_label = self.get_input(prev=None, i=0)
+      decoder_inputs = [first_label] + [None] * (self._params.seq_length - 1)
+      lstm_cell = tf.contrib.rnn.LSTMCell(
+          self._mparams.num_lstm_units,
+          use_peepholes=False,
+          cell_clip=self._mparams.lstm_state_clip_value,
+          state_is_tuple=True,
+          initializer=orthogonal_initializer)
+      lstm_outputs, _ = self.unroll_cell(
+          decoder_inputs=decoder_inputs,
+          initial_state=lstm_cell.zero_state(self._batch_size, tf.float32),
+          loop_function=self.get_input,
+          cell=lstm_cell)
+    with tf.variable_scope('logits'):
+      logits_list = [
+          tf.expand_dims(self.char_logit(logit, i), dim=1)
+          for i, logit in enumerate(lstm_outputs)
+      ]
+    return tf.concat(logits_list, 1)
+class NetSlice(SequenceLayerBase):
+  """A layer which uses a subset of image features to predict each character.
+  """
+  def __init__(self, *args, **kwargs):
+    super(NetSlice, self).__init__(*args, **kwargs)
+    self._zero_label = tf.zeros(
+        [self._batch_size, self._params.num_char_classes])
+  def get_image_feature(self, char_index):
+    """Returns a subset of image features for a character.
+    Args:
+      char_index: an index of a character.
+    Returns:
+      A tensor with shape [batch_size, ?]. The output depth depends on the
+      depth of input net.
+    """
+    batch_size, features_num, _ = [d.value for d in self._net.get_shape()]
+    slice_len = int(features_num / self._params.seq_length)
+    # In case when features_num != seq_length, we just pick a subset of image
+    # features, this choice is arbitrary and there is no intuitive geometrical
+    # interpretation. If features_num is not dividable by seq_length there will
+    # be unused image features.
+    net_slice = self._net[:, char_index:char_index + slice_len, :]
+    feature = tf.reshape(net_slice, [batch_size, -1])
+    logging.debug('Image feature: %s', feature)
+    return feature
+  def get_eval_input(self, prev, i):
+    """See SequenceLayerBase.get_eval_input for details."""
+    del prev
+    return self.get_image_feature(i)
+  def get_train_input(self, prev, i):
+    """See SequenceLayerBase.get_train_input for details."""
+    return self.get_eval_input(prev, i)
+  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
+    """See SequenceLayerBase.unroll_cell for details."""
+    return tf.contrib.legacy_seq2seq.rnn_decoder(
+        decoder_inputs=decoder_inputs,
+        initial_state=initial_state,
+        cell=cell,
+        loop_function=self.get_input)
+class NetSliceWithAutoregression(NetSlice):
+  """A layer similar to NetSlice, but it also uses auto regression.
+  The "auto regression" means that we use network output for previous character
+  as a part of input for the current character.
+  """
+  def __init__(self, *args, **kwargs):
+    super(NetSliceWithAutoregression, self).__init__(*args, **kwargs)
+  def get_eval_input(self, prev, i):
+    """See SequenceLayerBase.get_eval_input for details."""
+    if i == 0:
+      prev = self._zero_label
+    else:
+      logit = self.char_logit(prev, char_index=i - 1)
+      prev = self.char_one_hot(logit)
+    image_feature = self.get_image_feature(char_index=i)
+    return tf.concat([image_feature, prev], 1)
+  def get_train_input(self, prev, i):
+    """See SequenceLayerBase.get_train_input for details."""
+    if i == 0:
+      prev = self._zero_label
+    else:
+      prev = self._labels_one_hot[:, i - 1, :]
+    image_feature = self.get_image_feature(i)
+    return tf.concat([image_feature, prev], 1)
+class Attention(SequenceLayerBase):
+  """A layer which uses attention mechanism to select image features."""
+  def __init__(self, *args, **kwargs):
+    super(Attention, self).__init__(*args, **kwargs)
+    self._zero_label = tf.zeros(
+        [self._batch_size, self._params.num_char_classes])
+  def get_eval_input(self, prev, i):
+    """See SequenceLayerBase.get_eval_input for details."""
+    del prev, i
+    # The attention_decoder will fetch image features from the net, no need for
+    # extra inputs.
+    return self._zero_label
+  def get_train_input(self, prev, i):
+    """See SequenceLayerBase.get_train_input for details."""
+    return self.get_eval_input(prev, i)
+  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
+    return tf.contrib.legacy_seq2seq.attention_decoder(
+        decoder_inputs=decoder_inputs,
+        initial_state=initial_state,
+        attention_states=self._net,
+        cell=cell,
+        loop_function=self.get_input)
+class AttentionWithAutoregression(Attention):
+  """A layer which uses both attention and auto regression."""
+  def __init__(self, *args, **kwargs):
+    super(AttentionWithAutoregression, self).__init__(*args, **kwargs)
+  def get_train_input(self, prev, i):
+    """See SequenceLayerBase.get_train_input for details."""
+    if i == 0:
+      return self._zero_label
+    else:
+      # TODO(gorban): update to gradually introduce gt labels.
+      return self._labels_one_hot[:, i - 1, :]
+  def get_eval_input(self, prev, i):
+    """See SequenceLayerBase.get_eval_input for details."""
+    if i == 0:
+      return self._zero_label
+    else:
+      logit = self.char_logit(prev, char_index=i - 1)
+      return self.char_one_hot(logit)
+def get_layer_class(use_attention, use_autoregression):
+  """A convenience function to get a layer class based on requirements.
+  Args:
+    use_attention: if True a returned class will use attention.
+    use_autoregression: if True a returned class will use auto regression.
+  Returns:
+    One of available sequence layers (child classes for SequenceLayerBase).
+  """
+  if use_attention and use_autoregression:
+    layer_class = AttentionWithAutoregression
+  elif use_attention and not use_autoregression:
+    layer_class = Attention
+  elif not use_attention and not use_autoregression:
+    layer_class = NetSlice
+  elif not use_attention and use_autoregression:
+    layer_class = NetSliceWithAutoregression
+  else:
+    raise AssertionError('Unsupported sequence layer class')
+  logging.debug('Use %s as a layer class', layer_class.__name__)
+  return layer_class
--- a/attention_ocr/python/sequence_layers_test.py
+++ b/attention_ocr/python/sequence_layers_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for sequence_layers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import tensorflow as tf
+from tensorflow.contrib import slim
+import model
+import sequence_layers
+def fake_net(batch_size, num_features, feature_size):
+  return tf.convert_to_tensor(
+      np.random.uniform(size=(batch_size, num_features, feature_size)),
+      dtype=tf.float32)
+def fake_labels(batch_size, seq_length, num_char_classes):
+  labels_np = tf.convert_to_tensor(
+      np.random.randint(
+          low=0, high=num_char_classes, size=(batch_size, seq_length)))
+  return slim.one_hot_encoding(labels_np, num_classes=num_char_classes)
+def create_layer(layer_class, batch_size, seq_length, num_char_classes):
+  model_params = model.ModelParams(
+      num_char_classes=num_char_classes,
+      seq_length=seq_length,
+      num_views=1,
+      null_code=num_char_classes)
+  net = fake_net(
+      batch_size=batch_size, num_features=seq_length * 5, feature_size=6)
+  labels_one_hot = fake_labels(batch_size, seq_length, num_char_classes)
+  layer_params = sequence_layers.SequenceLayerParams(
+      num_lstm_units=10, weight_decay=0.00004, lstm_state_clip_value=10.0)
+  return layer_class(net, labels_one_hot, model_params, layer_params)
+class SequenceLayersTest(tf.test.TestCase):
+  def test_net_slice_char_logits_with_correct_shape(self):
+    batch_size = 2
+    seq_length = 4
+    num_char_classes = 3
+    layer = create_layer(sequence_layers.NetSlice, batch_size, seq_length,
+                         num_char_classes)
+    char_logits = layer.create_logits()
+    self.assertEqual(
+        tf.TensorShape([batch_size, seq_length, num_char_classes]),
+        char_logits.get_shape())
+  def test_net_slice_with_autoregression_char_logits_with_correct_shape(self):
+    batch_size = 2
+    seq_length = 4
+    num_char_classes = 3
+    layer = create_layer(sequence_layers.NetSliceWithAutoregression,
+                         batch_size, seq_length, num_char_classes)
+    char_logits = layer.create_logits()
+    self.assertEqual(
+        tf.TensorShape([batch_size, seq_length, num_char_classes]),
+        char_logits.get_shape())
+  def test_attention_char_logits_with_correct_shape(self):
+    batch_size = 2
+    seq_length = 4
+    num_char_classes = 3
+    layer = create_layer(sequence_layers.Attention, batch_size, seq_length,
+                         num_char_classes)
+    char_logits = layer.create_logits()
+    self.assertEqual(
+        tf.TensorShape([batch_size, seq_length, num_char_classes]),
+        char_logits.get_shape())
+  def test_attention_with_autoregression_char_logits_with_correct_shape(self):
+    batch_size = 2
+    seq_length = 4
+    num_char_classes = 3
+    layer = create_layer(sequence_layers.AttentionWithAutoregression,
+                         batch_size, seq_length, num_char_classes)
+    char_logits = layer.create_logits()
+    self.assertEqual(
+        tf.TensorShape([batch_size, seq_length, num_char_classes]),
+        char_logits.get_shape())
+if __name__ == '__main__':
+  tf.test.main()
--- a/attention_ocr/python/train.py
+++ b/attention_ocr/python/train.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Script to train the Attention OCR model.
+A simple usage example:
+python train.py
+"""
+import collections
+import logging
+import tensorflow as tf
+from tensorflow.contrib import slim
+from tensorflow import app
+from tensorflow.python.platform import flags
+from tensorflow.contrib.tfprof import model_analyzer
+import data_provider
+import common_flags
+FLAGS = flags.FLAGS
+common_flags.define()
+# yapf: disable
+flags.DEFINE_integer('task', 0,
+                     'The Task ID. This value is used when training with '
+                     'multiple workers to identify each worker.')
+flags.DEFINE_integer('ps_tasks', 0,
+                     'The number of parameter servers. If the value is 0, then'
+                     ' the parameters are handled locally by the worker.')
+flags.DEFINE_integer('save_summaries_secs', 60,
+                     'The frequency with which summaries are saved, in '
+                     'seconds.')
+flags.DEFINE_integer('save_interval_secs', 600,
+                     'Frequency in seconds of saving the model.')
+flags.DEFINE_integer('max_number_of_steps', int(1e10),
+                     'The maximum number of gradient steps.')
+flags.DEFINE_string('checkpoint_inception', '',
+                    'Checkpoint to recover inception weights from.')
+flags.DEFINE_float('clip_gradient_norm', 2.0,
+                   'If greater than 0 then the gradients would be clipped by '
+                   'it.')
+flags.DEFINE_bool('sync_replicas', False,
+                  'If True will synchronize replicas during training.')
+flags.DEFINE_integer('replicas_to_aggregate', 1,
+                     'The number of gradients updates before updating params.')
+flags.DEFINE_integer('total_num_replicas', 1,
+                     'Total number of worker replicas.')
+flags.DEFINE_integer('startup_delay_steps', 15,
+                     'Number of training steps between replicas startup.')
+flags.DEFINE_boolean('reset_train_dir', False,
+                     'If true will delete all files in the train_log_dir')
+flags.DEFINE_boolean('show_graph_stats', False,
+                     'Output model size stats to stderr.')
+# yapf: enable
+TrainingHParams = collections.namedtuple('TrainingHParams', [
+    'learning_rate',
+    'optimizer',
+    'momentum',
+    'use_augment_input',
+])
+def get_training_hparams():
+  return TrainingHParams(
+      learning_rate=FLAGS.learning_rate,
+      optimizer=FLAGS.optimizer,
+      momentum=FLAGS.momentum,
+      use_augment_input=FLAGS.use_augment_input)
+def create_optimizer(hparams):
+  """Creates optimized based on the specified flags."""
+  if hparams.optimizer == 'momentum':
+    optimizer = tf.train.MomentumOptimizer(
+        hparams.learning_rate, momentum=hparams.momentum)
+  elif hparams.optimizer == 'adam':
+    optimizer = tf.train.AdamOptimizer(hparams.learning_rate)
+  elif hparams.optimizer == 'adadelta':
+    optimizer = tf.train.AdadeltaOptimizer(hparams.learning_rate)
+  elif hparams.optimizer == 'adagrad':
+    optimizer = tf.train.AdagradOptimizer(hparams.learning_rate)
+  elif hparams.optimizer == 'rmsprop':
+    optimizer = tf.train.RMSPropOptimizer(
+        hparams.learning_rate, momentum=hparams.momentum)
+  return optimizer
+def train(loss, init_fn, hparams):
+  """Wraps slim.learning.train to run a training loop.
+  Args:
+    loss: a loss tensor
+    init_fn: A callable to be executed after all other initialization is done.
+    hparams: a model hyper parameters
+  """
+  optimizer = create_optimizer(hparams)
+  if FLAGS.sync_replicas:
+    replica_id = tf.constant(FLAGS.task, tf.int32, shape=())
+    optimizer = tf.LegacySyncReplicasOptimizer(
+        opt=optimizer,
+        replicas_to_aggregate=FLAGS.replicas_to_aggregate,
+        replica_id=replica_id,
+        total_num_replicas=FLAGS.total_num_replicas)
+    sync_optimizer = optimizer
+    startup_delay_steps = 0
+  else:
+    startup_delay_steps = 0
+    sync_optimizer = None
+  train_op = slim.learning.create_train_op(
+      loss,
+      optimizer,
+      summarize_gradients=True,
+      clip_gradient_norm=FLAGS.clip_gradient_norm)
+  slim.learning.train(
+      train_op=train_op,
+      logdir=FLAGS.train_log_dir,
+      graph=loss.graph,
+      master=FLAGS.master,
+      is_chief=(FLAGS.task == 0),
+      number_of_steps=FLAGS.max_number_of_steps,
+      save_summaries_secs=FLAGS.save_summaries_secs,
+      save_interval_secs=FLAGS.save_interval_secs,
+      startup_delay_steps=startup_delay_steps,
+      sync_optimizer=sync_optimizer,
+      init_fn=init_fn)
+def prepare_training_dir():
+  if not tf.gfile.Exists(FLAGS.train_log_dir):
+    logging.info('Create a new training directory %s', FLAGS.train_log_dir)
+    tf.gfile.MakeDirs(FLAGS.train_log_dir)
+  else:
+    if FLAGS.reset_train_dir:
+      logging.info('Reset the training directory %s', FLAGS.train_log_dir)
+      tf.gfile.DeleteRecursively(FLAGS.train_log_dir)
+      tf.gfile.MakeDirs(FLAGS.train_log_dir)
+    else:
+      logging.info('Use already existing training directory %s',
+                   FLAGS.train_log_dir)
+def calculate_graph_metrics():
+  param_stats = model_analyzer.print_model_analysis(
+      tf.get_default_graph(),
+      tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
+  return param_stats.total_parameters
+def main(_):
+  prepare_training_dir()
+  dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
+  model = common_flags.create_model(dataset.num_char_classes,
+                                    dataset.max_sequence_length,
+                                    dataset.num_of_views, dataset.null_code)
+  hparams = get_training_hparams()
+  # If ps_tasks is zero, the local device is used. When using multiple
+  # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
+  # across the different devices.
+  device_setter = tf.train.replica_device_setter(
+      FLAGS.ps_tasks, merge_devices=True)
+  with tf.device(device_setter):
+    data = data_provider.get_data(
+        dataset,
+        FLAGS.batch_size,
+        augment=hparams.use_augment_input,
+        central_crop_size=common_flags.get_crop_size())
+    endpoints = model.create_base(data.images, data.labels_one_hot)
+    total_loss = model.create_loss(data, endpoints)
+    model.create_summaries(data, endpoints, dataset.charset, is_training=True)
+    init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint,
+                                              FLAGS.checkpoint_inception)
+    if FLAGS.show_graph_stats:
+      logging.info('Total number of weights in the graph: %s',
+                   calculate_graph_metrics())
+    train(total_loss, init_fn, hparams)
+if __name__ == '__main__':
+  app.run()
--- a/attention_ocr/python/utils.py
+++ b/attention_ocr/python/utils.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to support building models for StreetView text transcription."""
+import tensorflow as tf
+from tensorflow.contrib import slim
+def logits_to_log_prob(logits):
+  """Computes log probabilities using numerically stable trick.
+  This uses two numerical stability tricks:
+  1) softmax(x) = softmax(x - c) where c is a constant applied to all
+  arguments. If we set c = max(x) then the softmax is more numerically
+  stable.
+  2) log softmax(x) is not numerically stable, but we can stabilize it
+  by using the identity log softmax(x) = x - log sum exp(x)
+  Args:
+    logits: Tensor of arbitrary shape whose last dimension contains logits.
+  Returns:
+    A tensor of the same shape as the input, but with corresponding log
+    probabilities.
+  """
+  with tf.variable_scope('log_probabilities'):
+    reduction_indices = len(logits.shape.as_list()) - 1
+    max_logits = tf.reduce_max(
+        logits, reduction_indices=reduction_indices, keep_dims=True)
+    safe_logits = tf.subtract(logits, max_logits)
+    sum_exp = tf.reduce_sum(
+        tf.exp(safe_logits),
+        reduction_indices=reduction_indices,
+        keep_dims=True)
+    log_probs = tf.subtract(safe_logits, tf.log(sum_exp))
+  return log_probs
+def variables_to_restore(scope=None, strip_scope=False):
+  """Returns a list of variables to restore for the specified list of methods.
+  It is supposed that variable name starts with the method's scope (a prefix
+  returned by _method_scope function).
+  Args:
+    methods_names: a list of names of configurable methods.
+    strip_scope: if True will return variable names without method's scope.
+      If methods_names is None will return names unchanged.
+    model_scope: a scope for a whole model.
+  Returns:
+    a dictionary mapping variable names to variables for restore.
+  """
+  if scope:
+    variable_map = {}
+    method_variables = slim.get_variables_to_restore(include=[scope])
+    for var in method_variables:
+      if strip_scope:
+        var_name = var.op.name[len(scope) + 1:]
+      else:
+        var_name = var.op.name
+      variable_map[var_name] = var
+    return variable_map
+  else:
+    return {v.op.name: v for v in slim.get_variables_to_restore()}
--- a/cognitive_mapping_and_planning/.gitignore
+++ b/cognitive_mapping_and_planning/.gitignore
+deps
+*.pyc
+lib*.so
+lib*.so*
--- a/cognitive_mapping_and_planning/README.md
+++ b/cognitive_mapping_and_planning/README.md
+# Cognitive Mapping and Planning for Visual Navigation
+**Saurabh Gupta, James Davidson, Sergey Levine, Rahul Sukthankar, Jitendra Malik**
+**Computer Vision and Pattern Recognition (CVPR) 2017.**
+**[ArXiv](https://arxiv.org/abs/1702.03920), 
+[Project Website](https://sites.google.com/corp/view/cognitive-mapping-and-planning/)**
+### Citing
+If you find this code base and models useful in your research, please consider
+citing the following paper:
+  ```
+  @inproceedings{gupta2017cognitive,
+    title={Cognitive Mapping and Planning for Visual Navigation},
+    author={Gupta, Saurabh and Davidson, James and Levine, Sergey and
+      Sukthankar, Rahul and Malik, Jitendra},
+    booktitle={CVPR},
+    year={2017}
+  }
+  ```
+### Contents
+1.  [Requirements: software](#requirements-software)
+2.  [Requirements: data](#requirements-data)
+3.  [Test Pre-trained Models](#test-pre_trained-models)
+4.  [Train your Own Models](#train-your-own-models)
+### Requirements: software
+1.  Python Virtual Env Setup: All code is implemented in Python but depends on a
+    small number of python packages and a couple of C libraries. We recommend
+    using virtual environment for installing these python packages and python
+    bindings for these C libraries.
+      ```Shell
+      VENV_DIR=venv
+      pip install virtualenv
+      virtualenv $VENV_DIR
+      source $VENV_DIR/bin/activate
+      # You may need to upgrade pip for installing openv-python.
+      pip install --upgrade pip
+      # Install simple dependencies.
+      pip install -r requirements.txt
+      # Patch bugs in dependencies.
+      sh patches/apply_patches.sh
+      ```
+2.  Install [Tensorflow](https://www.tensorflow.org/) inside this virtual
+    environment. Typically done with `pip install --upgrade tensorflow-gpu`.
+3.  Swiftshader: We use
+    [Swiftshader](https://github.com/google/swiftshader.git), a CPU based
+    renderer to render the meshes.  It is possible to use other renderers,
+    replace `SwiftshaderRenderer` in `render/swiftshader_renderer.py` with
+    bindings to your renderer. 
+    ```Shell
+    mkdir -p deps
+    git clone --recursive https://github.com/google/swiftshader.git deps/swiftshader-src
+    cd deps/swiftshader-src && git checkout 91da6b00584afd7dcaed66da88e2b617429b3950
+    mkdir build && cd build && cmake .. && make -j 16 libEGL libGLESv2
+    cd ../../../
+    cp deps/swiftshader-src/build/libEGL* libEGL.so.1
+    cp deps/swiftshader-src/build/libGLESv2* libGLESv2.so.2
+    ```
+4.  PyAssimp: We use [PyAssimp](https://github.com/assimp/assimp.git) to load
+    meshes.  It is possible to use other libraries to load meshes, replace
+    `Shape` `render/swiftshader_renderer.py` with bindings to your library for
+    loading meshes. 
+    ```Shell
+    mkdir -p deps
+    git clone https://github.com/assimp/assimp.git deps/assimp-src
+    cd deps/assimp-src
+    git checkout 2afeddd5cb63d14bc77b53740b38a54a97d94ee8
+    cmake CMakeLists.txt -G 'Unix Makefiles' && make -j 16
+    cd port/PyAssimp && python setup.py install
+    cd ../../../..
+    cp deps/assimp-src/lib/libassimp* .
+    ```
+5.  graph-tool: We use [graph-tool](https://git.skewed.de/count0/graph-tool)
+    library for graph processing.
+    ```Shell
+    mkdir -p deps
+    # If the following git clone command fails, you can also download the source
+    # from https://downloads.skewed.de/graph-tool/graph-tool-2.2.44.tar.bz2
+    git clone https://git.skewed.de/count0/graph-tool deps/graph-tool-src
+    cd deps/graph-tool-src && git checkout 178add3a571feb6666f4f119027705d95d2951ab
+    bash autogen.sh
+    ./configure --disable-cairo --disable-sparsehash --prefix=$HOME/.local
+    make -j 16
+    make install
+    cd ../../
+    ```
+### Requirements: data
+1.  Download the Stanford 3D Indoor Spaces Dataset (S3DIS Dataset) and ImageNet
+    Pre-trained models for initializing different models. Follow instructions in
+    `data/README.md`
+### Test Pre-trained Models
+1.  Download pre-trained models using
+    `scripts/scripts_download_pretrained_models.sh`
+2.  Test models using `scripts/script_test_pretrained_models.sh`.
+### Train Your Own Models
+All models were trained asynchronously with 16 workers each worker using data
+from a single floor. The default hyper-parameters correspond to this setting.
+See [distributed training with
+Tensorflow](https://www.tensorflow.org/deploy/distributed) for setting up
+distributed training. Training with a single worker is possible with the current
+code base but will require some minor changes to allow each worker to load all
+training environments.
+### Contact
+For questions or issues open an issue on the tensorflow/models [issues
+tracker](https://github.com/tensorflow/models/issues). Please assign issues to
+@s-gupta.
+### Credits
+This code was written by Saurabh Gupta (@s-gupta).
--- a/cognitive_mapping_and_planning/__init__.py
+++ b/cognitive_mapping_and_planning/__init__.py
--- a/cognitive_mapping_and_planning/cfgs/__init__.py
+++ b/cognitive_mapping_and_planning/cfgs/__init__.py
--- a/cognitive_mapping_and_planning/cfgs/config_cmp.py
+++ b/cognitive_mapping_and_planning/cfgs/config_cmp.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os, sys
+import numpy as np
+from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
+import logging
+import src.utils as utils
+import cfgs.config_common as cc
+import tensorflow as tf
+rgb_resnet_v2_50_path = 'data/init_models/resnet_v2_50/model.ckpt-5136169'
+d_resnet_v2_50_path = 'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002'
+def get_default_args():
+  summary_args = utils.Foo(display_interval=1, test_iters=26,
+                           arop_full_summary_iters=14)
+  control_args = utils.Foo(train=False, test=False,
+                           force_batchnorm_is_training_at_test=False,
+                           reset_rng_seed=False, only_eval_when_done=False,
+                           test_mode=None)
+  return summary_args, control_args
+def get_default_cmp_args():
+  batch_norm_param = {'center': True, 'scale': True,
+                      'activation_fn':tf.nn.relu}
+  mapper_arch_args = utils.Foo(
+      dim_reduce_neurons=64,
+      fc_neurons=[1024, 1024],
+      fc_out_size=8,
+      fc_out_neurons=64,
+      encoder='resnet_v2_50',
+      deconv_neurons=[64, 32, 16, 8, 4, 2],
+      deconv_strides=[2, 2, 2, 2, 2, 2],
+      deconv_layers_per_block=2,
+      deconv_kernel_size=4,
+      fc_dropout=0.5,
+      combine_type='wt_avg_logits',
+      batch_norm_param=batch_norm_param)
+  readout_maps_arch_args = utils.Foo(
+      num_neurons=[],
+      strides=[],
+      kernel_size=None,
+      layers_per_block=None)
+  arch_args = utils.Foo(
+      vin_val_neurons=8, vin_action_neurons=8, vin_ks=3, vin_share_wts=False,
+      pred_neurons=[64, 64], pred_batch_norm_param=batch_norm_param,
+      conv_on_value_map=0, fr_neurons=16, fr_ver='v2', fr_inside_neurons=64,
+      fr_stride=1, crop_remove_each=30, value_crop_size=4,
+      action_sample_type='sample', action_sample_combine_type='one_or_other',
+      sample_gt_prob_type='inverse_sigmoid_decay', dagger_sample_bn_false=True,
+      vin_num_iters=36, isd_k=750., use_agent_loc=False, multi_scale=True,
+      readout_maps=False, rom_arch=readout_maps_arch_args)
+  return arch_args, mapper_arch_args
+def get_arch_vars(arch_str):
+  if arch_str == '': vals = []
+  else: vals = arch_str.split('_')
+  ks = ['var1', 'var2', 'var3']
+  ks = ks[:len(vals)]
+  # Exp Ver.
+  if len(vals) == 0: ks.append('var1'); vals.append('v0')
+  # custom arch.
+  if len(vals) == 1: ks.append('var2'); vals.append('')
+  # map scape for projection baseline.
+  if len(vals) == 2: ks.append('var3'); vals.append('fr2')
+  assert(len(vals) == 3)
+  vars = utils.Foo()
+  for k, v in zip(ks, vals):
+    setattr(vars, k, v)
+  logging.error('arch_vars: %s', vars)
+  return vars
+def process_arch_str(args, arch_str):
+  # This function modifies args.
+  args.arch, args.mapper_arch = get_default_cmp_args()
+  arch_vars = get_arch_vars(arch_str)
+  args.navtask.task_params.outputs.ego_maps = True
+  args.navtask.task_params.outputs.ego_goal_imgs = True
+  args.navtask.task_params.outputs.egomotion = True
+  args.navtask.task_params.toy_problem = False
+  if arch_vars.var1 == 'lmap':
+    args = process_arch_learned_map(args, arch_vars)
+  elif arch_vars.var1 == 'pmap':
+    args = process_arch_projected_map(args, arch_vars)
+  else:
+    logging.fatal('arch_vars.var1 should be lmap or pmap, but is %s', arch_vars.var1)
+    assert(False)
+  return args
+def process_arch_learned_map(args, arch_vars):
+  # Multiscale vision based system.
+  args.navtask.task_params.input_type = 'vision'
+  args.navtask.task_params.outputs.images = True
+  if args.navtask.camera_param.modalities[0] == 'rgb':
+    args.solver.pretrained_path = rgb_resnet_v2_50_path
+  elif args.navtask.camera_param.modalities[0] == 'depth':
+    args.solver.pretrained_path = d_resnet_v2_50_path
+  if arch_vars.var2 == 'Ssc':
+    sc = 1./args.navtask.task_params.step_size
+    args.arch.vin_num_iters = 40
+    args.navtask.task_params.map_scales = [sc]
+    max_dist = args.navtask.task_params.max_dist * \
+        args.navtask.task_params.num_goals
+    args.navtask.task_params.map_crop_sizes = [2*max_dist]
+    args.arch.fr_stride = 1
+    args.arch.vin_action_neurons = 8
+    args.arch.vin_val_neurons = 3
+    args.arch.fr_inside_neurons = 32
+    args.mapper_arch.pad_map_with_zeros_each = [24]
+    args.mapper_arch.deconv_neurons = [64, 32, 16]
+    args.mapper_arch.deconv_strides = [1, 2, 1]
+  elif (arch_vars.var2 == 'Msc' or arch_vars.var2 == 'MscROMms' or
+        arch_vars.var2 == 'MscROMss' or arch_vars.var2 == 'MscNoVin'):
+    # Code for multi-scale planner.
+    args.arch.vin_num_iters = 8
+    args.arch.crop_remove_each = 4
+    args.arch.value_crop_size = 8
+    sc = 1./args.navtask.task_params.step_size
+    max_dist = args.navtask.task_params.max_dist * \
+        args.navtask.task_params.num_goals
+    n_scales = np.log2(float(max_dist) / float(args.arch.vin_num_iters))
+    n_scales = int(np.ceil(n_scales)+1)
+    args.navtask.task_params.map_scales = \
+        list(sc*(0.5**(np.arange(n_scales))[::-1]))
+    args.navtask.task_params.map_crop_sizes = [16 for x in range(n_scales)]
+    args.arch.fr_stride = 1
+    args.arch.vin_action_neurons = 8
+    args.arch.vin_val_neurons = 3
+    args.arch.fr_inside_neurons = 32
+    args.mapper_arch.pad_map_with_zeros_each = [0 for _ in range(n_scales)]
+    args.mapper_arch.deconv_neurons = [64*n_scales, 32*n_scales, 16*n_scales]
+    args.mapper_arch.deconv_strides = [1, 2, 1]
+    if arch_vars.var2 == 'MscNoVin':
+      # No planning version.
+      args.arch.fr_stride = [1, 2, 1, 2]
+      args.arch.vin_action_neurons = None
+      args.arch.vin_val_neurons = 16
+      args.arch.fr_inside_neurons = 32
+      args.arch.crop_remove_each = 0
+      args.arch.value_crop_size = 4
+      args.arch.vin_num_iters = 0
+    elif arch_vars.var2 == 'MscROMms' or arch_vars.var2 == 'MscROMss':
+      # Code with read outs, MscROMms flattens and reads out,
+      # MscROMss does not flatten and produces output at multiple scales.
+      args.navtask.task_params.outputs.readout_maps = True
+      args.navtask.task_params.map_resize_method = 'antialiasing'
+      args.arch.readout_maps = True
+      if arch_vars.var2 == 'MscROMms':
+        args.arch.rom_arch.num_neurons = [64, 1]
+        args.arch.rom_arch.kernel_size = 4
+        args.arch.rom_arch.strides = [2,2]
+        args.arch.rom_arch.layers_per_block = 2
+        args.navtask.task_params.readout_maps_crop_sizes = [64]
+        args.navtask.task_params.readout_maps_scales = [sc]
+      elif arch_vars.var2 == 'MscROMss':
+        args.arch.rom_arch.num_neurons = \
+            [64, len(args.navtask.task_params.map_scales)]
+        args.arch.rom_arch.kernel_size = 4
+        args.arch.rom_arch.strides = [1,1]
+        args.arch.rom_arch.layers_per_block = 1
+        args.navtask.task_params.readout_maps_crop_sizes = \
+            args.navtask.task_params.map_crop_sizes
+        args.navtask.task_params.readout_maps_scales = \
+            args.navtask.task_params.map_scales
+  else:
+    logging.fatal('arch_vars.var2 not one of Msc, MscROMms, MscROMss, MscNoVin.')
+    assert(False)
+  map_channels = args.mapper_arch.deconv_neurons[-1] / \
+    (2*len(args.navtask.task_params.map_scales))
+  args.navtask.task_params.map_channels = map_channels
+  return args
+def process_arch_projected_map(args, arch_vars):
+  # Single scale vision based system which does not use a mapper but instead
+  # uses an analytically estimated map.
+  ds = int(arch_vars.var3[2])
+  args.navtask.task_params.input_type = 'analytical_counts'
+  args.navtask.task_params.outputs.analytical_counts = True
+  assert(args.navtask.task_params.modalities[0] == 'depth')
+  args.navtask.camera_param.img_channels = None
+  analytical_counts = utils.Foo(map_sizes=[512/ds],
+                                xy_resolution=[5.*ds],
+                                z_bins=[[-10, 10, 150, 200]],
+                                non_linearity=[arch_vars.var2])
+  args.navtask.task_params.analytical_counts = analytical_counts
+  sc = 1./ds
+  args.arch.vin_num_iters = 36
+  args.navtask.task_params.map_scales = [sc]
+  args.navtask.task_params.map_crop_sizes = [512/ds]
+  args.arch.fr_stride = [1,2]
+  args.arch.vin_action_neurons = 8
+  args.arch.vin_val_neurons = 3
+  args.arch.fr_inside_neurons = 32
+  map_channels = len(analytical_counts.z_bins[0]) + 1
+  args.navtask.task_params.map_channels = map_channels
+  args.solver.freeze_conv = False
+  return args
+def get_args_for_config(config_name):
+  args = utils.Foo()
+  args.summary, args.control = get_default_args()
+  exp_name, mode_str = config_name.split('+')
+  arch_str, solver_str, navtask_str = exp_name.split('.')
+  logging.error('config_name: %s', config_name)
+  logging.error('arch_str: %s', arch_str)
+  logging.error('navtask_str: %s', navtask_str)
+  logging.error('solver_str: %s', solver_str)
+  logging.error('mode_str: %s', mode_str)
+  args.solver = cc.process_solver_str(solver_str)
+  args.navtask = cc.process_navtask_str(navtask_str)
+  args = process_arch_str(args, arch_str)
+  args.arch.isd_k = args.solver.isd_k
+  # Train, test, etc.
+  mode, imset = mode_str.split('_')
+  args = cc.adjust_args_for_mode(args, mode)
+  args.navtask.building_names = args.navtask.dataset.get_split(imset)
+  args.control.test_name = '{:s}_on_{:s}'.format(mode, imset)
+  # Log the arguments
+  logging.error('%s', args)
+  return args
--- a/cognitive_mapping_and_planning/cfgs/config_common.py
+++ b/cognitive_mapping_and_planning/cfgs/config_common.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+import numpy as np
+import logging
+import src.utils as utils
+import datasets.nav_env_config as nec
+from datasets import factory
+def adjust_args_for_mode(args, mode):
+  if mode == 'train':
+    args.control.train = True
+  elif mode == 'val1':
+    # Same settings as for training, to make sure nothing wonky is happening
+    # there.
+    args.control.test = True
+    args.control.test_mode = 'val'
+    args.navtask.task_params.batch_size = 32
+  elif mode == 'val2':
+    # No data augmentation, not sampling but taking the argmax action, not
+    # sampling from the ground truth at all.
+    args.control.test = True
+    args.arch.action_sample_type = 'argmax'
+    args.arch.sample_gt_prob_type = 'zero'
+    args.navtask.task_params.data_augment = \
+      utils.Foo(lr_flip=0, delta_angle=0, delta_xy=0, relight=False,
+                relight_fast=False, structured=False)
+    args.control.test_mode = 'val'
+    args.navtask.task_params.batch_size = 32
+  elif mode == 'bench':
+    # Actually testing the agent in settings that are kept same between
+    # different runs.
+    args.navtask.task_params.batch_size = 16
+    args.control.test = True
+    args.arch.action_sample_type = 'argmax'
+    args.arch.sample_gt_prob_type = 'zero'
+    args.navtask.task_params.data_augment = \
+      utils.Foo(lr_flip=0, delta_angle=0, delta_xy=0, relight=False,
+                relight_fast=False, structured=False)
+    args.summary.test_iters = 250
+    args.control.only_eval_when_done = True
+    args.control.reset_rng_seed = True
+    args.control.test_mode = 'test'
+  else:
+    logging.fatal('Unknown mode: %s.', mode)
+    assert(False)
+  return args
+def get_solver_vars(solver_str):
+  if solver_str == '': vals = []; 
+  else: vals = solver_str.split('_')
+  ks = ['clip', 'dlw', 'long', 'typ', 'isdk', 'adam_eps', 'init_lr'];
+  ks = ks[:len(vals)]
+  # Gradient clipping or not.
+  if len(vals) == 0: ks.append('clip'); vals.append('noclip');
+  # data loss weight.
+  if len(vals) == 1: ks.append('dlw');  vals.append('dlw20')
+  # how long to train for.
+  if len(vals) == 2: ks.append('long');  vals.append('nolong')
+  # Adam
+  if len(vals) == 3: ks.append('typ');  vals.append('adam2')
+  # reg loss wt
+  if len(vals) == 4: ks.append('rlw');  vals.append('rlw1')
+  # isd_k
+  if len(vals) == 5: ks.append('isdk');  vals.append('isdk415') # 415, inflexion at 2.5k.
+  # adam eps
+  if len(vals) == 6: ks.append('adam_eps');  vals.append('aeps1en8')
+  # init lr
+  if len(vals) == 7: ks.append('init_lr');  vals.append('lr1en3')
+  assert(len(vals) == 8)
+  vars = utils.Foo()
+  for k, v in zip(ks, vals):
+    setattr(vars, k, v)
+  logging.error('solver_vars: %s', vars)
+  return vars
+def process_solver_str(solver_str):
+  solver = utils.Foo(
+      seed=0, learning_rate_decay=None, clip_gradient_norm=None, max_steps=None,
+      initial_learning_rate=None, momentum=None, steps_per_decay=None,
+      logdir=None, sync=False, adjust_lr_sync=True, wt_decay=0.0001,
+      data_loss_wt=None, reg_loss_wt=None, freeze_conv=True, num_workers=1,
+      task=0, ps_tasks=0, master='local', typ=None, momentum2=None,
+      adam_eps=None)
+  # Clobber with overrides from solver str.
+  solver_vars = get_solver_vars(solver_str)
+  solver.data_loss_wt          = float(solver_vars.dlw[3:].replace('x', '.'))
+  solver.adam_eps              = float(solver_vars.adam_eps[4:].replace('x', '.').replace('n', '-'))
+  solver.initial_learning_rate = float(solver_vars.init_lr[2:].replace('x', '.').replace('n', '-'))
+  solver.reg_loss_wt           = float(solver_vars.rlw[3:].replace('x', '.'))
+  solver.isd_k                 = float(solver_vars.isdk[4:].replace('x', '.'))
+  long = solver_vars.long
+  if long == 'long':
+    solver.steps_per_decay = 40000
+    solver.max_steps = 120000
+  elif long == 'long2':
+    solver.steps_per_decay = 80000
+    solver.max_steps = 120000
+  elif long == 'nolong' or long == 'nol':
+    solver.steps_per_decay = 20000
+    solver.max_steps = 60000
+  else:
+    logging.fatal('solver_vars.long should be long, long2, nolong or nol.')
+    assert(False)
+  clip = solver_vars.clip
+  if clip == 'noclip' or clip == 'nocl':
+    solver.clip_gradient_norm = 0
+  elif clip[:4] == 'clip':
+    solver.clip_gradient_norm = float(clip[4:].replace('x', '.'))
+  else:
+    logging.fatal('Unknown solver_vars.clip: %s', clip)
+    assert(False)
+  typ = solver_vars.typ
+  if typ == 'adam':
+    solver.typ = 'adam'
+    solver.momentum = 0.9
+    solver.momentum2 = 0.999
+    solver.learning_rate_decay = 1.0
+  elif typ == 'adam2':
+    solver.typ = 'adam'
+    solver.momentum = 0.9
+    solver.momentum2 = 0.999
+    solver.learning_rate_decay = 0.1
+  elif typ == 'sgd':
+    solver.typ = 'sgd'
+    solver.momentum = 0.99
+    solver.momentum2 = None
+    solver.learning_rate_decay = 0.1
+  else:
+    logging.fatal('Unknown solver_vars.typ: %s', typ)
+    assert(False)
+  logging.error('solver: %s', solver)
+  return solver
+def get_navtask_vars(navtask_str):
+  if navtask_str == '': vals = []
+  else: vals = navtask_str.split('_')
+  ks_all = ['dataset_name', 'modality', 'task', 'history', 'max_dist',
+            'num_steps', 'step_size', 'n_ori', 'aux_views', 'data_aug']
+  ks = ks_all[:len(vals)]
+  # All data or not.
+  if len(vals) == 0: ks.append('dataset_name'); vals.append('sbpd')
+  # modality
+  if len(vals) == 1: ks.append('modality'); vals.append('rgb')
+  # semantic task?
+  if len(vals) == 2: ks.append('task'); vals.append('r2r')
+  # number of history frames.
+  if len(vals) == 3: ks.append('history'); vals.append('h0')
+  # max steps
+  if len(vals) == 4: ks.append('max_dist'); vals.append('32')
+  # num steps
+  if len(vals) == 5: ks.append('num_steps'); vals.append('40')
+  # step size
+  if len(vals) == 6: ks.append('step_size'); vals.append('8')
+  # n_ori
+  if len(vals) == 7: ks.append('n_ori'); vals.append('4')
+  # Auxiliary views.
+  if len(vals) == 8: ks.append('aux_views'); vals.append('nv0')
+  # Normal data augmentation as opposed to structured data augmentation (if set
+  # to straug.
+  if len(vals) == 9: ks.append('data_aug'); vals.append('straug')
+  assert(len(vals) == 10)
+  for i in range(len(ks)):
+    assert(ks[i] == ks_all[i])
+  vars = utils.Foo()
+  for k, v in zip(ks, vals):
+    setattr(vars, k, v)
+  logging.error('navtask_vars: %s', vals)
+  return vars
+def process_navtask_str(navtask_str):
+  navtask = nec.nav_env_base_config()
+  # Clobber with overrides from strings.
+  navtask_vars = get_navtask_vars(navtask_str)
+  navtask.task_params.n_ori = int(navtask_vars.n_ori)
+  navtask.task_params.max_dist = int(navtask_vars.max_dist)
+  navtask.task_params.num_steps = int(navtask_vars.num_steps)
+  navtask.task_params.step_size = int(navtask_vars.step_size)
+  navtask.task_params.data_augment.delta_xy = int(navtask_vars.step_size)/2.
+  n_aux_views_each = int(navtask_vars.aux_views[2])
+  aux_delta_thetas = np.concatenate((np.arange(n_aux_views_each) + 1,
+                                     -1 -np.arange(n_aux_views_each)))
+  aux_delta_thetas = aux_delta_thetas*np.deg2rad(navtask.camera_param.fov)
+  navtask.task_params.aux_delta_thetas = aux_delta_thetas
+  if navtask_vars.data_aug == 'aug':
+    navtask.task_params.data_augment.structured = False
+  elif navtask_vars.data_aug == 'straug':
+    navtask.task_params.data_augment.structured = True
+  else:
+    logging.fatal('Unknown navtask_vars.data_aug %s.', navtask_vars.data_aug)
+    assert(False)
+  navtask.task_params.num_history_frames = int(navtask_vars.history[1:])
+  navtask.task_params.n_views = 1+navtask.task_params.num_history_frames
+  navtask.task_params.goal_channels = int(navtask_vars.n_ori)
+  if navtask_vars.task == 'hard': 
+    navtask.task_params.type = 'rng_rejection_sampling_many'
+    navtask.task_params.rejection_sampling_M = 2000
+    navtask.task_params.min_dist = 10
+  elif navtask_vars.task == 'r2r':
+    navtask.task_params.type = 'room_to_room_many'
+  elif navtask_vars.task == 'ST':
+    # Semantic task at hand.
+    navtask.task_params.goal_channels = \
+        len(navtask.task_params.semantic_task.class_map_names)
+    navtask.task_params.rel_goal_loc_dim = \
+        len(navtask.task_params.semantic_task.class_map_names)
+    navtask.task_params.type = 'to_nearest_obj_acc'
+  else:
+    logging.fatal('navtask_vars.task: should be hard or r2r, ST')
+    assert(False)
+  if navtask_vars.modality == 'rgb':
+    navtask.camera_param.modalities = ['rgb']
+    navtask.camera_param.img_channels = 3
+  elif navtask_vars.modality == 'd':
+    navtask.camera_param.modalities = ['depth']
+    navtask.camera_param.img_channels = 2
+  navtask.task_params.img_height   = navtask.camera_param.height
+  navtask.task_params.img_width    = navtask.camera_param.width
+  navtask.task_params.modalities   = navtask.camera_param.modalities
+  navtask.task_params.img_channels = navtask.camera_param.img_channels
+  navtask.task_params.img_fov      = navtask.camera_param.fov
+  navtask.dataset = factory.get_dataset(navtask_vars.dataset_name)
+  return navtask
--- a/cognitive_mapping_and_planning/cfgs/config_distill.py
+++ b/cognitive_mapping_and_planning/cfgs/config_distill.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import pprint
+import copy
+import os
+from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
+import logging
+import src.utils as utils
+import cfgs.config_common as cc
+import tensorflow as tf
+rgb_resnet_v2_50_path = 'cache/resnet_v2_50_inception_preprocessed/model.ckpt-5136169'
+def get_default_args():
+  robot = utils.Foo(radius=15, base=10, height=140, sensor_height=120,
+                    camera_elevation_degree=-15)
+  camera_param = utils.Foo(width=225, height=225, z_near=0.05, z_far=20.0,
+                           fov=60., modalities=['rgb', 'depth'])
+  env = utils.Foo(padding=10, resolution=5, num_point_threshold=2,
+                  valid_min=-10, valid_max=200, n_samples_per_face=200)
+  data_augment = utils.Foo(lr_flip=0, delta_angle=1, delta_xy=4, relight=False,
+                           relight_fast=False, structured=False)
+  task_params = utils.Foo(num_actions=4, step_size=4, num_steps=0,
+                          batch_size=32, room_seed=0, base_class='Building',
+                          task='mapping', n_ori=6, data_augment=data_augment,
+                          output_transform_to_global_map=False,
+                          output_canonical_map=False,
+                          output_incremental_transform=False,
+                          output_free_space=False, move_type='shortest_path',
+                          toy_problem=0)
+  buildinger_args = utils.Foo(building_names=['area1_gates_wingA_floor1_westpart'],
+                              env_class=None, robot=robot, 
+                              task_params=task_params, env=env,
+                              camera_param=camera_param)
+  solver_args = utils.Foo(seed=0, learning_rate_decay=0.1,
+                          clip_gradient_norm=0, max_steps=120000,
+                          initial_learning_rate=0.001, momentum=0.99,
+                          steps_per_decay=40000, logdir=None, sync=False,
+                          adjust_lr_sync=True, wt_decay=0.0001,
+                          data_loss_wt=1.0, reg_loss_wt=1.0,
+                          num_workers=1, task=0, ps_tasks=0, master='local')
+  summary_args = utils.Foo(display_interval=1, test_iters=100)
+  control_args = utils.Foo(train=False, test=False,
+                           force_batchnorm_is_training_at_test=False)
+  arch_args = utils.Foo(rgb_encoder='resnet_v2_50', d_encoder='resnet_v2_50')
+  return utils.Foo(solver=solver_args,
+                   summary=summary_args, control=control_args, arch=arch_args,
+                   buildinger=buildinger_args)
+def get_vars(config_name):
+  vars = config_name.split('_')
+  if len(vars) == 1: # All data or not.
+    vars.append('noall')
+  if len(vars) == 2: # n_ori
+    vars.append('4')
+  logging.error('vars: %s', vars)
+  return vars
+def get_args_for_config(config_name):
+  args = get_default_args()
+  config_name, mode = config_name.split('+')
+  vars = get_vars(config_name)
+  logging.info('config_name: %s, mode: %s', config_name, mode)
+  args.buildinger.task_params.n_ori = int(vars[2])
+  args.solver.freeze_conv = True
+  args.solver.pretrained_path = resnet_v2_50_path
+  args.buildinger.task_params.img_channels = 5
+  args.solver.data_loss_wt = 0.00001
+  if vars[0] == 'v0':
+    None
+  else:
+    logging.error('config_name: %s undefined', config_name)
+  args.buildinger.task_params.height = args.buildinger.camera_param.height
+  args.buildinger.task_params.width = args.buildinger.camera_param.width
+  args.buildinger.task_params.modalities = args.buildinger.camera_param.modalities
+  if vars[1] == 'all':
+    args = cc.get_args_for_mode_building_all(args, mode)
+  elif vars[1] == 'noall':
+    args = cc.get_args_for_mode_building(args, mode)
+  # Log the arguments
+  logging.error('%s', args)
+  return args
--- a/cognitive_mapping_and_planning/cfgs/config_vision_baseline.py
+++ b/cognitive_mapping_and_planning/cfgs/config_vision_baseline.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import pprint
+import os
+import numpy as np
+from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
+import logging
+import src.utils as utils
+import cfgs.config_common as cc
+import datasets.nav_env_config as nec
+import tensorflow as tf
+FLAGS = flags.FLAGS
+get_solver_vars = cc.get_solver_vars
+get_navtask_vars = cc.get_navtask_vars
+rgb_resnet_v2_50_path = 'data/init_models/resnet_v2_50/model.ckpt-5136169'
+d_resnet_v2_50_path = 'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002'
+def get_default_args():
+  summary_args = utils.Foo(display_interval=1, test_iters=26,
+                           arop_full_summary_iters=14)
+  control_args = utils.Foo(train=False, test=False,
+                           force_batchnorm_is_training_at_test=False,
+                           reset_rng_seed=False, only_eval_when_done=False,
+                           test_mode=None)
+  return summary_args, control_args
+def get_default_baseline_args():
+  batch_norm_param = {'center': True, 'scale': True,
+                      'activation_fn':tf.nn.relu}
+  arch_args = utils.Foo(
+      pred_neurons=[], goal_embed_neurons=[], img_embed_neurons=[],
+      batch_norm_param=batch_norm_param, dim_reduce_neurons=64, combine_type='',
+      encoder='resnet_v2_50', action_sample_type='sample',
+      action_sample_combine_type='one_or_other',
+      sample_gt_prob_type='inverse_sigmoid_decay', dagger_sample_bn_false=True,
+      isd_k=750., use_visit_count=False, lstm_output=False, lstm_ego=False,
+      lstm_img=False, fc_dropout=0.0, embed_goal_for_state=False,
+      lstm_output_init_state_from_goal=False)
+  return arch_args
+def get_arch_vars(arch_str):
+  if arch_str == '': vals = []
+  else: vals = arch_str.split('_')
+  ks = ['ver', 'lstm_dim', 'dropout']
+  # Exp Ver
+  if len(vals) == 0: vals.append('v0')
+  # LSTM dimentsions
+  if len(vals) == 1: vals.append('lstm2048')
+  # Dropout
+  if len(vals) == 2: vals.append('noDO')
+  assert(len(vals) == 3)
+  vars = utils.Foo()
+  for k, v in zip(ks, vals):
+    setattr(vars, k, v)
+  logging.error('arch_vars: %s', vars)
+  return vars
+def process_arch_str(args, arch_str):
+  # This function modifies args.
+  args.arch = get_default_baseline_args()
+  arch_vars = get_arch_vars(arch_str)
+  args.navtask.task_params.outputs.rel_goal_loc = True
+  args.navtask.task_params.input_type = 'vision'
+  args.navtask.task_params.outputs.images = True
+  if args.navtask.camera_param.modalities[0] == 'rgb':
+    args.solver.pretrained_path = rgb_resnet_v2_50_path
+  elif args.navtask.camera_param.modalities[0] == 'depth':
+    args.solver.pretrained_path = d_resnet_v2_50_path
+  else:
+    logging.fatal('Neither of rgb or d')
+  if arch_vars.dropout == 'DO': 
+    args.arch.fc_dropout = 0.5
+  args.tfcode = 'B'
+  exp_ver = arch_vars.ver
+  if exp_ver == 'v0':
+    # Multiplicative interaction between goal loc and image features.
+    args.arch.combine_type = 'multiply'
+    args.arch.pred_neurons = [256, 256]
+    args.arch.goal_embed_neurons = [64, 8]
+    args.arch.img_embed_neurons = [1024, 512, 256*8]
+  elif exp_ver == 'v1':
+    # Additive interaction between goal and image features.
+    args.arch.combine_type = 'add'
+    args.arch.pred_neurons = [256, 256]
+    args.arch.goal_embed_neurons = [64, 256]
+    args.arch.img_embed_neurons = [1024, 512, 256]
+  elif exp_ver == 'v2':
+    # LSTM at the output on top of multiple interactions.
+    args.arch.combine_type = 'multiply'
+    args.arch.goal_embed_neurons = [64, 8]
+    args.arch.img_embed_neurons = [1024, 512, 256*8]
+    args.arch.lstm_output = True
+    args.arch.lstm_output_dim = int(arch_vars.lstm_dim[4:])
+    args.arch.pred_neurons = [256] # The other is inside the LSTM.
+  elif exp_ver == 'v0blind':
+    # LSTM only on the goal location.
+    args.arch.combine_type = 'goalonly'
+    args.arch.goal_embed_neurons = [64, 256]
+    args.arch.img_embed_neurons = [2] # I dont know what it will do otherwise.
+    args.arch.lstm_output = True
+    args.arch.lstm_output_dim = 256
+    args.arch.pred_neurons = [256] # The other is inside the LSTM.
+  else:
+    logging.fatal('exp_ver: %s undefined', exp_ver)
+    assert(False)
+  # Log the arguments
+  logging.error('%s', args)
+  return args
+def get_args_for_config(config_name):
+  args = utils.Foo()
+  args.summary, args.control = get_default_args()
+  exp_name, mode_str = config_name.split('+')
+  arch_str, solver_str, navtask_str = exp_name.split('.')
+  logging.error('config_name: %s', config_name)
+  logging.error('arch_str: %s', arch_str)
+  logging.error('navtask_str: %s', navtask_str)
+  logging.error('solver_str: %s', solver_str)
+  logging.error('mode_str: %s', mode_str)
+  args.solver = cc.process_solver_str(solver_str)
+  args.navtask = cc.process_navtask_str(navtask_str)
+  args = process_arch_str(args, arch_str)
+  args.arch.isd_k = args.solver.isd_k
+  # Train, test, etc.
+  mode, imset = mode_str.split('_')
+  args = cc.adjust_args_for_mode(args, mode)
+  args.navtask.building_names = args.navtask.dataset.get_split(imset)
+  args.control.test_name = '{:s}_on_{:s}'.format(mode, imset)
+  # Log the arguments
+  logging.error('%s', args)
+  return args
--- a/cognitive_mapping_and_planning/data/.gitignore
+++ b/cognitive_mapping_and_planning/data/.gitignore
+stanford_building_parser_dataset_raw
+stanford_building_parser_dataset
+init_models
--- a/cognitive_mapping_and_planning/data/README.md
+++ b/cognitive_mapping_and_planning/data/README.md
+This directory contains the data needed for training and benchmarking various
+navigation models.
+1.  Download the data from the [dataset website]
+    (http://buildingparser.stanford.edu/dataset.html).
+    1.  [Raw meshes](https://goo.gl/forms/2YSPaO2UKmn5Td5m2). We need the meshes
+        which are in the noXYZ folder. Download the tar files and place them in
+        the `stanford_building_parser_dataset_raw` folder. You need to download
+        `area_1_noXYZ.tar`, `area_3_noXYZ.tar`, `area_5a_noXYZ.tar`,
+        `area_5b_noXYZ.tar`, `area_6_noXYZ.tar` for training and
+        `area_4_noXYZ.tar` for evaluation.
+    2.  [Annotations](https://goo.gl/forms/4SoGp4KtH1jfRqEj2) for setting up
+        tasks. We will need the file called `Stanford3dDataset_v1.2.zip`. Place
+        the file in the directory `stanford_building_parser_dataset_raw`.
+2.  Preprocess the data.
+    1.  Extract meshes using `scripts/script_preprocess_meshes_S3DIS.sh`. After
+        this `ls data/stanford_building_parser_dataset/mesh` should have 6
+        folders `area1`, `area3`, `area4`, `area5a`, `area5b`, `area6`, with
+        textures and obj files within each directory.
+    2.  Extract out room information and semantics from zip file using
+        `scripts/script_preprocess_annoations_S3DIS.sh`. After this there should
+        be `room-dimension` and `class-maps` folder in
+        `data/stanford_building_parser_dataset`. (If you find this script to
+        crash because of an exception in np.loadtxt while processing
+        `Area_5/office_19/Annotations/ceiling_1.txt`, there is a special
+        character on line 323474, that should be removed manually.)
+3.  Download ImageNet Pre-trained models. We used ResNet-v2-50 for representing
+    images. For RGB images this is pre-trained on ImageNet. For Depth images we
+    [distill](https://arxiv.org/abs/1507.00448) the RGB model to depth images
+    using paired RGB-D images. Both there models are available through
+    `scripts/script_download_init_models.sh`
--- a/cognitive_mapping_and_planning/datasets/__init__.py
+++ b/cognitive_mapping_and_planning/datasets/__init__.py
--- a/cognitive_mapping_and_planning/datasets/factory.py
+++ b/cognitive_mapping_and_planning/datasets/factory.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Wrapper for selecting the navigation environment that we want to train and
+test on.
+"""
+import numpy as np
+import os, glob
+import platform
+import logging
+from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
+import render.swiftshader_renderer as renderer 
+import src.file_utils as fu
+import src.utils as utils
+def get_dataset(dataset_name):
+  if dataset_name == 'sbpd':
+    dataset = StanfordBuildingParserDataset(dataset_name)
+  else:
+    logging.fatal('Not one of sbpd')
+  return dataset
+class Loader():
+  def get_data_dir():
+    pass
+  def get_meta_data(self, file_name, data_dir=None):
+    if data_dir is None:
+      data_dir = self.get_data_dir()
+    full_file_name = os.path.join(data_dir, 'meta', file_name)
+    assert(fu.exists(full_file_name)), \
+      '{:s} does not exist'.format(full_file_name)
+    ext = os.path.splitext(full_file_name)[1]
+    if ext == '.txt':
+      ls = []
+      with fu.fopen(full_file_name, 'r') as f:
+        for l in f:
+          ls.append(l.rstrip())
+    elif ext == '.pkl':
+      ls = utils.load_variables(full_file_name)
+    return ls
+  def load_building(self, name, data_dir=None):
+    if data_dir is None:
+      data_dir = self.get_data_dir()
+    out = {}
+    out['name'] = name
+    out['data_dir'] = data_dir
+    out['room_dimension_file'] = os.path.join(data_dir, 'room-dimension',
+                                              name+'.pkl')
+    out['class_map_folder'] = os.path.join(data_dir, 'class-maps')
+    return out
+  def load_building_meshes(self, building):
+    dir_name = os.path.join(building['data_dir'], 'mesh', building['name'])
+    mesh_file_name = glob.glob1(dir_name, '*.obj')[0]
+    mesh_file_name_full = os.path.join(dir_name, mesh_file_name)
+    logging.error('Loading building from obj file: %s', mesh_file_name_full)
+    shape = renderer.Shape(mesh_file_name_full, load_materials=True, 
+                           name_prefix=building['name']+'_')
+    return [shape]
+class StanfordBuildingParserDataset(Loader):
+  def __init__(self, ver):
+    self.ver = ver
+    self.data_dir = None
+  def get_data_dir(self):
+    if self.data_dir is None:
+      self.data_dir = 'data/stanford_building_parser_dataset/'
+    return self.data_dir
+  def get_benchmark_sets(self):
+    return self._get_benchmark_sets()
+  def get_split(self, split_name):
+    if self.ver == 'sbpd':
+      return self._get_split(split_name)
+    else:
+      logging.fatal('Unknown version.')
+  def _get_benchmark_sets(self):
+    sets = ['train1', 'val', 'test']
+    return sets
+  def _get_split(self, split_name):
+    train = ['area1', 'area5a', 'area5b', 'area6']
+    train1 = ['area1']
+    val = ['area3']
+    test = ['area4']
+    sets = {}
+    sets['train'] = train
+    sets['train1'] = train1
+    sets['val'] = val
+    sets['test'] = test
+    sets['all'] = sorted(list(set(train + val + test)))
+    return sets[split_name]
--- a/cognitive_mapping_and_planning/datasets/nav_env.py
+++ b/cognitive_mapping_and_planning/datasets/nav_env.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Navidation Environment. Includes the following classes along with some
+helper functions.
+  Building: Loads buildings, computes traversibility, exposes functionality for
+    rendering images.
+  GridWorld: Base class which implements functionality for moving an agent on a
+    grid world.
+  NavigationEnv: Base class which generates navigation problems on a grid world.
+  VisualNavigationEnv: Builds upon NavigationEnv and Building to provide
+    interface that is used externally to train the agent. 
+  MeshMapper: Class used for distilling the model, testing the mapper.
+  BuildingMultiplexer: Wrapper class that instantiates a VisualNavigationEnv for
+    each building and multiplexes between them as needed.
+"""
+import numpy as np
+import os
+import re
+import matplotlib.pyplot as plt
+import graph_tool as gt
+import graph_tool.topology
+from tensorflow.python.platform import gfile
+import logging
+import src.file_utils as fu
+import src.utils as utils
+import src.graph_utils as gu
+import src.map_utils as mu
+import src.depth_utils as du
+import render.swiftshader_renderer as sru
+from render.swiftshader_renderer import SwiftshaderRenderer
+import cv2
+label_nodes_with_class           = gu.label_nodes_with_class
+label_nodes_with_class_geodesic  = gu.label_nodes_with_class_geodesic
+get_distance_node_list           = gu.get_distance_node_list
+convert_to_graph_tool            = gu.convert_to_graph_tool
+generate_graph                   = gu.generate_graph
+get_hardness_distribution        = gu.get_hardness_distribution
+rng_next_goal_rejection_sampling = gu.rng_next_goal_rejection_sampling
+rng_next_goal                    = gu.rng_next_goal
+rng_room_to_room                 = gu.rng_room_to_room
+rng_target_dist_field            = gu.rng_target_dist_field
+compute_traversibility           = mu.compute_traversibility
+make_map                         = mu.make_map
+resize_maps                      = mu.resize_maps
+pick_largest_cc                  = mu.pick_largest_cc
+get_graph_origin_loc             = mu.get_graph_origin_loc
+generate_egocentric_maps         = mu.generate_egocentric_maps
+generate_goal_images             = mu.generate_goal_images
+get_map_to_predict               = mu.get_map_to_predict
+bin_points                       = du.bin_points
+make_geocentric                  = du.make_geocentric
+get_point_cloud_from_z           = du.get_point_cloud_from_z
+get_camera_matrix                = du.get_camera_matrix
+def _get_semantic_maps(folder_name, building_name, map, flip):
+  # Load file from the cache.
+  file_name = '{:s}_{:d}_{:d}_{:d}_{:d}_{:d}_{:d}.pkl'
+  file_name = file_name.format(building_name, map.size[0], map.size[1],
+                               map.origin[0], map.origin[1], map.resolution,
+                               flip)
+  file_name = os.path.join(folder_name, file_name)
+  logging.info('Loading semantic maps from %s.', file_name)
+  if fu.exists(file_name):
+    a = utils.load_variables(file_name)
+    maps = a['maps'] #HxWx#C
+    cats = a['cats']
+  else:
+    logging.error('file_name: %s not found.', file_name)
+    maps = None
+    cats = None
+  return maps, cats
+def _select_classes(all_maps, all_cats, cats_to_use):
+  inds = []
+  for c in cats_to_use:
+    ind = all_cats.index(c)
+    inds.append(ind)
+  out_maps = all_maps[:,:,inds]
+  return out_maps
+def _get_room_dimensions(file_name, resolution, origin, flip=False):
+  if fu.exists(file_name):
+    a = utils.load_variables(file_name)['room_dimension']
+    names = a.keys()
+    dims = np.concatenate(a.values(), axis=0).reshape((-1,6))
+    ind = np.argsort(names)
+    dims = dims[ind,:]
+    names = [names[x] for x in ind]
+    if flip:
+      dims_new = dims*1
+      dims_new[:,1] = -dims[:,4]
+      dims_new[:,4] = -dims[:,1]
+      dims = dims_new*1
+    dims = dims*100.
+    dims[:,0] = dims[:,0] - origin[0]
+    dims[:,1] = dims[:,1] - origin[1]
+    dims[:,3] = dims[:,3] - origin[0]
+    dims[:,4] = dims[:,4] - origin[1]
+    dims = dims / resolution
+    out = {'names': names, 'dims': dims}
+  else:
+    out = None
+  return out
+def _filter_rooms(room_dims, room_regex):
+  pattern = re.compile(room_regex)
+  ind = []
+  for i, name in enumerate(room_dims['names']):
+    if pattern.match(name):
+      ind.append(i)
+  new_room_dims = {}
+  new_room_dims['names'] = [room_dims['names'][i] for i in ind]
+  new_room_dims['dims'] = room_dims['dims'][ind,:]*1
+  return new_room_dims
+def _label_nodes_with_room_id(xyt, room_dims):
+  # Label the room with the ID into things.
+  node_room_id = -1*np.ones((xyt.shape[0], 1))
+  dims = room_dims['dims']
+  for x, name in enumerate(room_dims['names']):
+    all_ = np.concatenate((xyt[:,[0]] >= dims[x,0],
+                           xyt[:,[0]] <= dims[x,3],
+                           xyt[:,[1]] >= dims[x,1],
+                           xyt[:,[1]] <= dims[x,4]), axis=1)
+    node_room_id[np.all(all_, axis=1), 0] = x
+  return node_room_id
+def get_path_ids(start_node_id, end_node_id, pred_map):
+  id = start_node_id
+  path = [id]
+  while id != end_node_id:
+    id = pred_map[id]
+    path.append(id)
+  return path
+def image_pre(images, modalities):
+  # Assumes images are ...xHxWxC.
+  # We always assume images are RGB followed by Depth.
+  if 'depth' in modalities:
+    d = images[...,-1][...,np.newaxis]*1.
+    d[d < 0.01] = np.NaN; isnan = np.isnan(d);
+    d = 100./d; d[isnan] = 0.;
+    images = np.concatenate((images[...,:-1], d, isnan), axis=images.ndim-1)
+  if 'rgb' in modalities:
+    images[...,:3] = images[...,:3]*1. - 128
+  return images
+def _get_relative_goal_loc(goal_loc, loc, theta):
+  r = np.sqrt(np.sum(np.square(goal_loc - loc), axis=1))
+  t = np.arctan2(goal_loc[:,1] - loc[:,1], goal_loc[:,0] - loc[:,0])
+  t = t-theta[:,0] + np.pi/2
+  return np.expand_dims(r,axis=1), np.expand_dims(t, axis=1)
+def _gen_perturbs(rng, batch_size, num_steps, lr_flip, delta_angle, delta_xy,
+                  structured):
+  perturbs = []
+  for i in range(batch_size):
+    # Doing things one by one for each episode in this batch. This way this
+    # remains replicatable even when we change the batch size.
+    p = np.zeros((num_steps+1, 4))
+    if lr_flip:
+      # Flip the whole trajectory.
+      p[:,3] = rng.rand(1)-0.5
+    if delta_angle > 0:
+      if structured:
+        p[:,2] = (rng.rand(1)-0.5)* delta_angle
+      else:
+        p[:,2] = (rng.rand(p.shape[0])-0.5)* delta_angle
+    if delta_xy > 0:
+      if structured:
+        p[:,:2] = (rng.rand(1, 2)-0.5)*delta_xy
+      else:
+        p[:,:2] = (rng.rand(p.shape[0], 2)-0.5)*delta_xy
+    perturbs.append(p)
+  return perturbs
+def get_multiplexer_class(args, task_number):
+  assert(args.task_params.base_class == 'Building')
+  logging.info('Returning BuildingMultiplexer')
+  R = BuildingMultiplexer(args, task_number)
+  return R
+class GridWorld():
+  def __init__(self):
+    """Class members that will be assigned by any class that actually uses this
+    class."""
+    self.restrict_to_largest_cc = None
+    self.robot = None
+    self.env = None
+    self.category_list = None
+    self.traversible = None
+  def get_loc_axis(self, node, delta_theta, perturb=None):
+    """Based on the node orientation returns X, and Y axis. Used to sample the
+    map in egocentric coordinate frame.
+    """
+    if type(node) == tuple:
+      node = np.array([node])
+    if perturb is None:
+      perturb = np.zeros((node.shape[0], 4))
+    xyt = self.to_actual_xyt_vec(node)
+    x = xyt[:,[0]] + perturb[:,[0]]
+    y = xyt[:,[1]] + perturb[:,[1]]
+    t = xyt[:,[2]] + perturb[:,[2]]
+    theta = t*delta_theta
+    loc = np.concatenate((x,y), axis=1)
+    x_axis = np.concatenate((np.cos(theta), np.sin(theta)), axis=1)
+    y_axis = np.concatenate((np.cos(theta+np.pi/2.), np.sin(theta+np.pi/2.)),
+                            axis=1)
+    # Flip the sampled map where need be.
+    y_axis[np.where(perturb[:,3] > 0)[0], :] *= -1.
+    return loc, x_axis, y_axis, theta
+  def to_actual_xyt(self, pqr):
+    """Converts from node to location on the map."""
+    (p, q, r) = pqr
+    if self.task.n_ori == 6:
+      out = (p - q * 0.5 + self.task.origin_loc[0],
+             q * np.sqrt(3.) / 2. + self.task.origin_loc[1], r)
+    elif self.task.n_ori == 4:
+      out = (p + self.task.origin_loc[0],
+             q + self.task.origin_loc[1], r)
+    return out
+  def to_actual_xyt_vec(self, pqr):
+    """Converts from node array to location array on the map."""
+    p = pqr[:,0][:, np.newaxis]
+    q = pqr[:,1][:, np.newaxis]
+    r = pqr[:,2][:, np.newaxis]
+    if self.task.n_ori == 6:
+      out = np.concatenate((p - q * 0.5 + self.task.origin_loc[0],
+                            q * np.sqrt(3.) / 2. + self.task.origin_loc[1],
+                            r), axis=1)
+    elif self.task.n_ori == 4:
+      out = np.concatenate((p + self.task.origin_loc[0],
+                            q + self.task.origin_loc[1],
+                            r), axis=1)
+    return out
+  def raw_valid_fn_vec(self, xyt):
+    """Returns if the given set of nodes is valid or not."""
+    height = self.traversible.shape[0]
+    width = self.traversible.shape[1]
+    x = np.round(xyt[:,[0]]).astype(np.int32)
+    y = np.round(xyt[:,[1]]).astype(np.int32)
+    is_inside = np.all(np.concatenate((x >= 0, y >= 0,
+                                       x < width, y < height), axis=1), axis=1)
+    x = np.minimum(np.maximum(x, 0), width-1)
+    y = np.minimum(np.maximum(y, 0), height-1)
+    ind = np.ravel_multi_index((y,x), self.traversible.shape)
+    is_traversible = self.traversible.ravel()[ind]
+    is_valid = np.all(np.concatenate((is_inside[:,np.newaxis], is_traversible),
+                                     axis=1), axis=1)
+    return is_valid
+  def valid_fn_vec(self, pqr):
+    """Returns if the given set of nodes is valid or not."""
+    xyt = self.to_actual_xyt_vec(np.array(pqr))
+    height = self.traversible.shape[0]
+    width = self.traversible.shape[1]
+    x = np.round(xyt[:,[0]]).astype(np.int32)
+    y = np.round(xyt[:,[1]]).astype(np.int32)
+    is_inside = np.all(np.concatenate((x >= 0, y >= 0,
+                                       x < width, y < height), axis=1), axis=1)
+    x = np.minimum(np.maximum(x, 0), width-1)
+    y = np.minimum(np.maximum(y, 0), height-1)
+    ind = np.ravel_multi_index((y,x), self.traversible.shape)
+    is_traversible = self.traversible.ravel()[ind]
+    is_valid = np.all(np.concatenate((is_inside[:,np.newaxis], is_traversible),
+                                     axis=1), axis=1)
+    return is_valid
+  def get_feasible_actions(self, node_ids):
+    """Returns the feasible set of actions from the current node."""
+    a = np.zeros((len(node_ids), self.task_params.num_actions), dtype=np.int32)
+    gtG = self.task.gtG
+    next_node = []
+    for i, c in enumerate(node_ids):
+      neigh = gtG.vertex(c).out_neighbours()
+      neigh_edge = gtG.vertex(c).out_edges()
+      nn = {}
+      for n, e in zip(neigh, neigh_edge):
+        _ = gtG.ep['action'][e]
+        a[i,_] = 1
+        nn[_] = int(n)
+      next_node.append(nn)
+    return a, next_node
+  def take_action(self, current_node_ids, action):
+    """Returns the new node after taking the action action. Stays at the current
+    node if the action is invalid."""
+    actions, next_node_ids = self.get_feasible_actions(current_node_ids)
+    new_node_ids = []
+    for i, (c,a) in enumerate(zip(current_node_ids, action)):
+      if actions[i,a] == 1:
+        new_node_ids.append(next_node_ids[i][a])
+      else:
+        new_node_ids.append(c)
+    return new_node_ids
+  def set_r_obj(self, r_obj):
+    """Sets the SwiftshaderRenderer object used for rendering."""
+    self.r_obj = r_obj
+class Building(GridWorld):
+  def __init__(self, building_name, robot, env,
+               category_list=None, small=False, flip=False, logdir=None,
+               building_loader=None):
+    self.restrict_to_largest_cc = True
+    self.robot = robot
+    self.env = env
+    self.logdir = logdir
+    # Load the building meta data.
+    building = building_loader.load_building(building_name)
+    if small:
+      building['mesh_names'] = building['mesh_names'][:5]
+    # New code.
+    shapess = building_loader.load_building_meshes(building)
+    if flip:
+      for shapes in shapess:
+        shapes.flip_shape()
+    vs = []
+    for shapes in shapess:
+      vs.append(shapes.get_vertices()[0])
+    vs = np.concatenate(vs, axis=0)
+    map = make_map(env.padding, env.resolution, vertex=vs, sc=100.)
+    map = compute_traversibility(
+        map, robot.base, robot.height, robot.radius, env.valid_min,
+        env.valid_max, env.num_point_threshold, shapess=shapess, sc=100.,
+        n_samples_per_face=env.n_samples_per_face)
+    room_dims = _get_room_dimensions(building['room_dimension_file'],
+                                     env.resolution, map.origin, flip=flip)
+    class_maps, class_map_names = _get_semantic_maps(
+        building['class_map_folder'], building_name, map, flip)
+    self.class_maps      = class_maps
+    self.class_map_names = class_map_names
+    self.building        = building
+    self.shapess         = shapess
+    self.map             = map
+    self.traversible     = map.traversible*1
+    self.building_name   = building_name
+    self.room_dims       = room_dims
+    self.flipped         = flip
+    self.renderer_entitiy_ids = []
+    if self.restrict_to_largest_cc:
+      self.traversible = pick_largest_cc(self.traversible)
+  def load_building_into_scene(self):
+    # Loads the scene.
+    self.renderer_entitiy_ids += self.r_obj.load_shapes(self.shapess)
+    # Free up memory, we dont need the mesh or the materials anymore.
+    self.shapess = None
+  def add_entity_at_nodes(self, nodes, height, shape):
+    xyt = self.to_actual_xyt_vec(nodes)
+    nxy = xyt[:,:2]*1.
+    nxy = nxy * self.map.resolution
+    nxy = nxy + self.map.origin
+    Ts = np.concatenate((nxy, nxy[:,:1]), axis=1)
+    Ts[:,2] = height; Ts = Ts / 100.;
+    # Merge all the shapes into a single shape and add that shape.
+    shape.replicate_shape(Ts)
+    entity_ids = self.r_obj.load_shapes([shape])
+    self.renderer_entitiy_ids += entity_ids
+    return entity_ids
+  def add_shapes(self, shapes):
+    scene = self.r_obj.viz.scene()
+    for shape in shapes:
+      scene.AddShape(shape)
+  def add_materials(self, materials):
+    scene = self.r_obj.viz.scene()
+    for material in materials:
+      scene.AddOrUpdateMaterial(material)
+  def set_building_visibility(self, visibility):
+    self.r_obj.set_entity_visible(self.renderer_entitiy_ids, visibility)
+  def render_nodes(self, nodes, perturb=None, aux_delta_theta=0.):
+    self.set_building_visibility(True)
+    if perturb is None:
+      perturb = np.zeros((len(nodes), 4))
+    imgs = []
+    r = 2
+    elevation_z = r * np.tan(np.deg2rad(self.robot.camera_elevation_degree))
+    for i in range(len(nodes)):
+      xyt = self.to_actual_xyt(nodes[i])
+      lookat_theta = 3.0 * np.pi / 2.0 - (xyt[2]+perturb[i,2]+aux_delta_theta) * (self.task.delta_theta)
+      nxy = np.array([xyt[0]+perturb[i,0], xyt[1]+perturb[i,1]]).reshape(1, -1)
+      nxy = nxy * self.map.resolution
+      nxy = nxy + self.map.origin
+      camera_xyz = np.zeros((1, 3))
+      camera_xyz[...] = [nxy[0, 0], nxy[0, 1], self.robot.sensor_height]
+      camera_xyz = camera_xyz / 100.
+      lookat_xyz = np.array([-r * np.sin(lookat_theta),
+                             -r * np.cos(lookat_theta), elevation_z])
+      lookat_xyz = lookat_xyz + camera_xyz[0, :]
+      self.r_obj.position_camera(camera_xyz[0, :].tolist(),
+                                 lookat_xyz.tolist(), [0.0, 0.0, 1.0])
+      img = self.r_obj.render(take_screenshot=True, output_type=0)
+      img = [x for x in img if x is not None]
+      img = np.concatenate(img, axis=2).astype(np.float32)
+      if perturb[i,3]>0:
+        img = img[:,::-1,:]
+      imgs.append(img)
+    self.set_building_visibility(False)
+    return imgs
+class MeshMapper(Building):
+  def __init__(self, robot, env, task_params, building_name, category_list,
+               flip, logdir=None, building_loader=None):
+    Building.__init__(self, building_name, robot, env, category_list,
+                      small=task_params.toy_problem, flip=flip, logdir=logdir,
+                      building_loader=building_loader)
+    self.task_params = task_params
+    self.task = None
+    self._preprocess_for_task(self.task_params.building_seed)
+  def _preprocess_for_task(self, seed):
+    if self.task is None or self.task.seed != seed:
+      rng = np.random.RandomState(seed)
+      origin_loc = get_graph_origin_loc(rng, self.traversible)
+      self.task = utils.Foo(seed=seed, origin_loc=origin_loc,
+                            n_ori=self.task_params.n_ori)
+      G = generate_graph(self.valid_fn_vec,
+                                  self.task_params.step_size, self.task.n_ori,
+                                  (0, 0, 0))
+      gtG, nodes, nodes_to_id = convert_to_graph_tool(G)
+      self.task.gtG = gtG
+      self.task.nodes = nodes
+      self.task.delta_theta = 2.0*np.pi/(self.task.n_ori*1.)
+      self.task.nodes_to_id = nodes_to_id
+      logging.info('Building %s, #V=%d, #E=%d', self.building_name,
+                   self.task.nodes.shape[0], self.task.gtG.num_edges())
+      if self.logdir is not None:
+        write_traversible = cv2.applyColorMap(self.traversible.astype(np.uint8)*255, cv2.COLORMAP_JET)
+        img_path = os.path.join(self.logdir,
+                                '{:s}_{:d}_graph.png'.format(self.building_name,
+                                                             seed))
+        node_xyt = self.to_actual_xyt_vec(self.task.nodes)
+        plt.set_cmap('jet');
+        fig, ax = utils.subplot(plt, (1,1), (12,12))
+        ax.plot(node_xyt[:,0], node_xyt[:,1], 'm.')
+        ax.imshow(self.traversible, origin='lower');
+        ax.set_axis_off(); ax.axis('equal');
+        ax.set_title('{:s}, {:d}, {:d}'.format(self.building_name,
+                                               self.task.nodes.shape[0],
+                                               self.task.gtG.num_edges()))
+        if self.room_dims is not None:
+          for i, r in enumerate(self.room_dims['dims']*1):
+            min_ = r[:3]*1
+            max_ = r[3:]*1
+            xmin, ymin, zmin = min_
+            xmax, ymax, zmax = max_
+            ax.plot([xmin, xmax, xmax, xmin, xmin],
+                    [ymin, ymin, ymax, ymax, ymin], 'g')
+        with fu.fopen(img_path, 'w') as f:
+          fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
+        plt.close(fig)
+  def _gen_rng(self, rng):
+    # instances is a list of list of node_ids.
+    if self.task_params.move_type == 'circle':
+      _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size,
+                                                self.task.gtG, rng, 0, 1,
+                                                compute_path=True)
+      instances_ = paths
+      instances = []
+      for instance_ in instances_:
+        instance = instance_
+        for i in range(self.task_params.num_steps):
+          instance.append(self.take_action([instance[-1]], [1])[0])
+        instances.append(instance)
+    elif self.task_params.move_type == 'shortest_path':
+      _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size,
+                                                self.task.gtG, rng,
+                                                self.task_params.num_steps,
+                                                self.task_params.num_steps+1,
+                                                compute_path=True)
+      instances = paths
+    elif self.task_params.move_type == 'circle+forward':
+      _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size,
+                                                self.task.gtG, rng, 0, 1,
+                                                compute_path=True)
+      instances_ = paths
+      instances = []
+      for instance_ in instances_:
+        instance = instance_
+        for i in range(self.task_params.n_ori-1):
+          instance.append(self.take_action([instance[-1]], [1])[0])
+        while len(instance) <= self.task_params.num_steps:
+          while self.take_action([instance[-1]], [3])[0] == instance[-1] and len(instance) <= self.task_params.num_steps:
+            instance.append(self.take_action([instance[-1]], [2])[0])
+          if len(instance) <= self.task_params.num_steps:
+            instance.append(self.take_action([instance[-1]], [3])[0])
+        instances.append(instance)
+    # Do random perturbation if needed.
+    perturbs = _gen_perturbs(rng, self.task_params.batch_size,
+                             self.task_params.num_steps,
+                             self.task_params.data_augment.lr_flip,
+                             self.task_params.data_augment.delta_angle,
+                             self.task_params.data_augment.delta_xy,
+                             self.task_params.data_augment.structured)
+    return instances, perturbs
+  def worker(self, instances, perturbs):
+    # Output the images and the free space.
+    # Make the instances be all the same length.
+    for i in range(len(instances)):
+      for j in range(self.task_params.num_steps - len(instances[i]) + 1):
+        instances[i].append(instances[i][-1])
+      if perturbs[i].shape[0] < self.task_params.num_steps+1:
+        p = np.zeros((self.task_params.num_steps+1, 4))
+        p[:perturbs[i].shape[0], :] = perturbs[i]
+        p[perturbs[i].shape[0]:, :] = perturbs[i][-1,:]
+        perturbs[i] = p
+    instances_ = []
+    for instance in instances:
+      instances_ = instances_ + instance
+    perturbs_ = np.concatenate(perturbs, axis=0)
+    instances_nodes = self.task.nodes[instances_,:]
+    instances_nodes = [tuple(x) for x in instances_nodes]
+    imgs_ = self.render_nodes(instances_nodes, perturbs_)
+    imgs = []; next = 0;
+    for instance in instances:
+      img_i = []
+      for _ in instance:
+        img_i.append(imgs_[next])
+        next = next+1
+      imgs.append(img_i)
+    imgs = np.array(imgs)
+    # Render out the maps in the egocentric view for all nodes and not just the
+    # last node.
+    all_nodes = []
+    for x in instances:
+      all_nodes = all_nodes + x
+    all_perturbs = np.concatenate(perturbs, axis=0)
+    loc, x_axis, y_axis, theta = self.get_loc_axis(
+        self.task.nodes[all_nodes, :]*1, delta_theta=self.task.delta_theta,
+        perturb=all_perturbs)
+    fss = None
+    valids = None
+    loc_on_map = None
+    theta_on_map = None
+    cum_fs = None
+    cum_valid = None
+    incremental_locs = None
+    incremental_thetas = None
+    if self.task_params.output_free_space:
+      fss, valids = get_map_to_predict(loc, x_axis, y_axis,
+                                       map=self.traversible*1.,
+                                       map_size=self.task_params.map_size)
+      fss = np.array(fss) > 0.5
+      fss = np.reshape(fss, [self.task_params.batch_size,
+                             self.task_params.num_steps+1,
+                             self.task_params.map_size,
+                             self.task_params.map_size])
+      valids = np.reshape(np.array(valids), fss.shape)
+    if self.task_params.output_transform_to_global_map:
+      # Output the transform to the global map.
+      loc_on_map = np.reshape(loc*1, [self.task_params.batch_size,
+                                      self.task_params.num_steps+1, -1])
+      # Converting to location wrt to first location so that warping happens
+      # properly.
+      theta_on_map = np.reshape(theta*1, [self.task_params.batch_size,
+                                            self.task_params.num_steps+1, -1])
+    if self.task_params.output_incremental_transform:
+      # Output the transform to the global map.
+      incremental_locs_ = np.reshape(loc*1, [self.task_params.batch_size,
+                                             self.task_params.num_steps+1, -1])
+      incremental_locs_[:,1:,:] -= incremental_locs_[:,:-1,:]
+      t0 = -np.pi/2+np.reshape(theta*1, [self.task_params.batch_size,
+                                        self.task_params.num_steps+1, -1])
+      t = t0*1
+      incremental_locs = incremental_locs_*1
+      incremental_locs[:,:,0] = np.sum(incremental_locs_ * np.concatenate((np.cos(t), np.sin(t)), axis=-1), axis=-1)
+      incremental_locs[:,:,1] = np.sum(incremental_locs_ * np.concatenate((np.cos(t+np.pi/2), np.sin(t+np.pi/2)), axis=-1), axis=-1)
+      incremental_locs[:,0,:] = incremental_locs_[:,0,:]
+      # print incremental_locs_[0,:,:], incremental_locs[0,:,:], t0[0,:,:]
+      incremental_thetas = np.reshape(theta*1, [self.task_params.batch_size,
+                                                self.task_params.num_steps+1,
+                                                -1])
+      incremental_thetas[:,1:,:] += -incremental_thetas[:,:-1,:]
+    if self.task_params.output_canonical_map:
+      loc_ = loc[0::(self.task_params.num_steps+1), :]
+      x_axis = np.zeros_like(loc_); x_axis[:,1] = 1
+      y_axis = np.zeros_like(loc_); y_axis[:,0] = -1
+      cum_fs, cum_valid = get_map_to_predict(loc_, x_axis, y_axis,
+                                             map=self.traversible*1.,
+                                             map_size=self.task_params.map_size)
+      cum_fs = np.array(cum_fs) > 0.5
+      cum_fs = np.reshape(cum_fs, [self.task_params.batch_size, 1,
+                                   self.task_params.map_size,
+                                   self.task_params.map_size])
+      cum_valid = np.reshape(np.array(cum_valid), cum_fs.shape)
+    inputs = {'fs_maps': fss,
+              'valid_maps': valids,
+              'imgs': imgs,
+              'loc_on_map': loc_on_map,
+              'theta_on_map': theta_on_map,
+              'cum_fs_maps': cum_fs,
+              'cum_valid_maps': cum_valid,
+              'incremental_thetas': incremental_thetas,
+              'incremental_locs': incremental_locs}
+    return inputs
+  def pre(self, inputs):
+    inputs['imgs'] = image_pre(inputs['imgs'], self.task_params.modalities)
+    if inputs['loc_on_map'] is not None:
+      inputs['loc_on_map'] = inputs['loc_on_map'] - inputs['loc_on_map'][:,[0],:]
+    if inputs['theta_on_map'] is not None:
+      inputs['theta_on_map'] = np.pi/2. - inputs['theta_on_map']
+    return inputs
+def _nav_env_reset_helper(type, rng, nodes, batch_size, gtG, max_dist,
+                          num_steps, num_goals, data_augment, **kwargs):
+  """Generates and returns a new episode."""
+  max_compute = max_dist + 4*num_steps
+  if type == 'general':
+    start_node_ids, end_node_ids, dist, pred_map, paths = \
+        rng_target_dist_field(batch_size, gtG, rng, max_dist, max_compute,
+                              nodes=nodes, compute_path=False)
+    target_class = None
+  elif type == 'room_to_room_many':
+    goal_node_ids = []; dists = [];
+    node_room_ids = kwargs['node_room_ids']
+    # Sample the first one
+    start_node_ids_, end_node_ids_, dist_, _, _ = rng_room_to_room(
+        batch_size, gtG, rng, max_dist, max_compute,
+        node_room_ids=node_room_ids, nodes=nodes)
+    start_node_ids = start_node_ids_
+    goal_node_ids.append(end_node_ids_)
+    dists.append(dist_)
+    for n in range(num_goals-1):
+      start_node_ids_, end_node_ids_, dist_, _, _ = rng_next_goal(
+          goal_node_ids[n], batch_size, gtG, rng, max_dist,
+          max_compute, node_room_ids=node_room_ids, nodes=nodes,
+          dists_from_start_node=dists[n])
+      goal_node_ids.append(end_node_ids_)
+      dists.append(dist_)
+    target_class = None
+  elif type == 'rng_rejection_sampling_many':
+    num_goals = num_goals
+    goal_node_ids = []; dists = [];
+    n_ori = kwargs['n_ori']
+    step_size = kwargs['step_size']
+    min_dist = kwargs['min_dist']
+    sampling_distribution = kwargs['sampling_distribution']
+    target_distribution = kwargs['target_distribution']
+    rejection_sampling_M = kwargs['rejection_sampling_M']
+    distribution_bins = kwargs['distribution_bins']
+    for n in range(num_goals):
+      if n == 0: input_nodes = None
+      else: input_nodes = goal_node_ids[n-1]
+      start_node_ids_, end_node_ids_, dist_, _, _, _, _ = rng_next_goal_rejection_sampling(
+              input_nodes, batch_size, gtG, rng, max_dist, min_dist,
+              max_compute, sampling_distribution, target_distribution, nodes,
+              n_ori, step_size, distribution_bins, rejection_sampling_M)
+      if n == 0: start_node_ids = start_node_ids_
+      goal_node_ids.append(end_node_ids_)
+      dists.append(dist_)
+    target_class = None
+  elif type == 'room_to_room_back':
+    num_goals = num_goals
+    assert(num_goals == 2), 'num_goals must be 2.'
+    goal_node_ids = []; dists = [];
+    node_room_ids = kwargs['node_room_ids']
+    # Sample the first one.
+    start_node_ids_, end_node_ids_, dist_, _, _ = rng_room_to_room(
+        batch_size, gtG, rng, max_dist, max_compute,
+        node_room_ids=node_room_ids, nodes=nodes)
+    start_node_ids = start_node_ids_
+    goal_node_ids.append(end_node_ids_)
+    dists.append(dist_)
+    # Set second goal to be starting position, and compute distance to the start node.
+    goal_node_ids.append(start_node_ids)
+    dist = []
+    for i in range(batch_size):
+      dist_ = gt.topology.shortest_distance(
+          gt.GraphView(gtG, reversed=True),
+          source=gtG.vertex(start_node_ids[i]), target=None)
+      dist_ = np.array(dist_.get_array())
+      dist.append(dist_)
+    dists.append(dist)
+    target_class = None
+  elif type[:14] == 'to_nearest_obj':
+    # Generate an episode by sampling one of the target classes (with
+    # probability proportional to the number of nodes in the world).
+    # With the sampled class sample a node that is within some distance from
+    # the sampled class.
+    class_nodes   = kwargs['class_nodes']
+    sampling      = kwargs['sampling']
+    dist_to_class = kwargs['dist_to_class']
+    assert(num_goals == 1), 'Only supports a single goal.'
+    ind = rng.choice(class_nodes.shape[0], size=batch_size)
+    target_class = class_nodes[ind,1]
+    start_node_ids = []; dists = []; goal_node_ids = [];
+    for t in target_class:
+      if sampling == 'uniform':
+        max_dist = max_dist
+        cnts = np.bincount(dist_to_class[t], minlength=max_dist+1)*1.
+        cnts[max_dist+1:] = 0
+        p_each = 1./ cnts / (max_dist+1.)
+        p_each[cnts == 0] = 0
+        p = p_each[dist_to_class[t]]*1.; p = p/np.sum(p)
+        start_node_id = rng.choice(p.shape[0], size=1, p=p)[0]
+      else:
+        logging.fatal('Sampling not one of uniform.')
+      start_node_ids.append(start_node_id)
+      dists.append(dist_to_class[t])
+      # Dummy goal node, same as the start node, so that vis is better.
+      goal_node_ids.append(start_node_id)
+    dists = [dists]
+    goal_node_ids = [goal_node_ids]
+  return start_node_ids, goal_node_ids, dists, target_class
+class NavigationEnv(GridWorld, Building):
+  """Wrapper around GridWorld which sets up navigation tasks.
+  """
+  def _debug_save_hardness(self, seed):
+    out_path = os.path.join(self.logdir, '{:s}_{:d}_hardness.png'.format(self.building_name, seed))
+    batch_size = 4000
+    rng = np.random.RandomState(0)
+    start_node_ids, end_node_ids, dists, pred_maps, paths, hardnesss, gt_dists = \
+      rng_next_goal_rejection_sampling(
+          None, batch_size, self.task.gtG, rng, self.task_params.max_dist,
+          self.task_params.min_dist, self.task_params.max_dist,
+          self.task.sampling_distribution, self.task.target_distribution,
+          self.task.nodes, self.task_params.n_ori, self.task_params.step_size,
+          self.task.distribution_bins, self.task.rejection_sampling_M)
+    bins = self.task.distribution_bins 
+    n_bins = self.task.n_bins
+    with plt.style.context('ggplot'):
+      fig, axes = utils.subplot(plt, (1,2), (10,10))
+      ax = axes[0]
+      _ = ax.hist(hardnesss, bins=bins, weights=np.ones_like(hardnesss)/len(hardnesss))
+      ax.plot(bins[:-1]+0.5/n_bins, self.task.target_distribution, 'g')
+      ax.plot(bins[:-1]+0.5/n_bins, self.task.sampling_distribution, 'b')
+      ax.grid('on')
+      ax = axes[1]
+      _ = ax.hist(gt_dists, bins=np.arange(self.task_params.max_dist+1))
+      ax.grid('on')
+      ax.set_title('Mean: {:0.2f}, Median: {:0.2f}'.format(np.mean(gt_dists),
+                                                           np.median(gt_dists)))
+      with fu.fopen(out_path, 'w') as f:
+        fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
+  def _debug_save_map_nodes(self, seed):
+    """Saves traversible space along with nodes generated on the graph. Takes
+    the seed as input."""
+    img_path = os.path.join(self.logdir, '{:s}_{:d}_graph.png'.format(self.building_name, seed))
+    node_xyt = self.to_actual_xyt_vec(self.task.nodes)
+    plt.set_cmap('jet');
+    fig, ax = utils.subplot(plt, (1,1), (12,12))
+    ax.plot(node_xyt[:,0], node_xyt[:,1], 'm.')
+    ax.set_axis_off(); ax.axis('equal');
+    if self.room_dims is not None:
+      for i, r in enumerate(self.room_dims['dims']*1):
+        min_ = r[:3]*1
+        max_ = r[3:]*1
+        xmin, ymin, zmin = min_
+        xmax, ymax, zmax = max_
+        ax.plot([xmin, xmax, xmax, xmin, xmin],
+                [ymin, ymin, ymax, ymax, ymin], 'g')
+    ax.imshow(self.traversible, origin='lower');
+    with fu.fopen(img_path, 'w') as f:
+      fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
+  def _debug_semantic_maps(self, seed):
+    """Saves traversible space along with nodes generated on the graph. Takes
+    the seed as input."""
+    for i, cls in enumerate(self.task_params.semantic_task.class_map_names):
+      img_path = os.path.join(self.logdir, '{:s}_flip{:d}_{:s}_graph.png'.format(self.building_name, seed, cls))
+      maps = self.traversible*1.
+      maps += 0.5*(self.task.class_maps_dilated[:,:,i])
+      write_traversible = (maps*1.+1.)/3.0
+      write_traversible = (write_traversible*255.).astype(np.uint8)[:,:,np.newaxis]
+      write_traversible = write_traversible + np.zeros((1,1,3), dtype=np.uint8)
+      fu.write_image(img_path, write_traversible[::-1,:,:])
+  def _preprocess_for_task(self, seed):
+    """Sets up the task field for doing navigation on the grid world."""
+    if self.task is None or self.task.seed != seed:
+      rng = np.random.RandomState(seed)
+      origin_loc = get_graph_origin_loc(rng, self.traversible)
+      self.task = utils.Foo(seed=seed, origin_loc=origin_loc,
+                            n_ori=self.task_params.n_ori)
+      G = generate_graph(self.valid_fn_vec, self.task_params.step_size,
+                         self.task.n_ori, (0, 0, 0))
+      gtG, nodes, nodes_to_id = convert_to_graph_tool(G)
+      self.task.gtG = gtG
+      self.task.nodes = nodes
+      self.task.delta_theta = 2.0*np.pi/(self.task.n_ori*1.)
+      self.task.nodes_to_id = nodes_to_id
+      logging.info('Building %s, #V=%d, #E=%d', self.building_name,
+                   self.task.nodes.shape[0], self.task.gtG.num_edges())
+      type = self.task_params.type
+      if type == 'general':
+        # Do nothing
+        _ = None
+      elif type == 'room_to_room_many' or type == 'room_to_room_back':
+        if type == 'room_to_room_back':
+          assert(self.task_params.num_goals == 2), 'num_goals must be 2.'
+        self.room_dims = _filter_rooms(self.room_dims, self.task_params.room_regex)
+        xyt = self.to_actual_xyt_vec(self.task.nodes)
+        self.task.node_room_ids = _label_nodes_with_room_id(xyt, self.room_dims)
+        self.task.reset_kwargs = {'node_room_ids': self.task.node_room_ids}
+      elif type == 'rng_rejection_sampling_many':
+        n_bins = 20
+        rejection_sampling_M = self.task_params.rejection_sampling_M
+        min_dist = self.task_params.min_dist
+        bins = np.arange(n_bins+1)/(n_bins*1.)
+        target_d = np.zeros(n_bins); target_d[...] = 1./n_bins;
+        sampling_d = get_hardness_distribution(
+            self.task.gtG, self.task_params.max_dist, self.task_params.min_dist,
+            np.random.RandomState(0), 4000, bins, self.task.nodes,
+            self.task_params.n_ori, self.task_params.step_size)
+        self.task.reset_kwargs = {'distribution_bins': bins,
+                                  'target_distribution': target_d,
+                                  'sampling_distribution': sampling_d,
+                                  'rejection_sampling_M': rejection_sampling_M,
+                                  'n_bins': n_bins, 
+                                  'n_ori': self.task_params.n_ori,
+                                  'step_size': self.task_params.step_size,
+                                  'min_dist': self.task_params.min_dist}
+        self.task.n_bins = n_bins
+        self.task.distribution_bins = bins
+        self.task.target_distribution = target_d
+        self.task.sampling_distribution = sampling_d
+        self.task.rejection_sampling_M = rejection_sampling_M
+        if self.logdir is not None:
+          self._debug_save_hardness(seed)
+      elif type[:14] == 'to_nearest_obj':
+        self.room_dims = _filter_rooms(self.room_dims, self.task_params.room_regex)
+        xyt = self.to_actual_xyt_vec(self.task.nodes)
+        self.class_maps = _select_classes(self.class_maps,
+                                          self.class_map_names,
+                                          self.task_params.semantic_task.class_map_names)*1
+        self.class_map_names = self.task_params.semantic_task.class_map_names
+        nodes_xyt = self.to_actual_xyt_vec(np.array(self.task.nodes))
+        tt = utils.Timer(); tt.tic();
+        if self.task_params.type == 'to_nearest_obj_acc':
+          self.task.class_maps_dilated, self.task.node_class_label = label_nodes_with_class_geodesic(
+            nodes_xyt, self.class_maps,
+            self.task_params.semantic_task.pix_distance+8, self.map.traversible,
+            ff_cost=1., fo_cost=1., oo_cost=4., connectivity=8.)
+        dists = []
+        for i in range(len(self.class_map_names)):
+          class_nodes_ = np.where(self.task.node_class_label[:,i])[0]
+          dists.append(get_distance_node_list(gtG, source_nodes=class_nodes_, direction='to'))
+        self.task.dist_to_class = dists
+        a_, b_ = np.where(self.task.node_class_label)
+        self.task.class_nodes = np.concatenate((a_[:,np.newaxis], b_[:,np.newaxis]), axis=1)
+        if self.logdir is not None:
+          self._debug_semantic_maps(seed)
+        self.task.reset_kwargs = {'sampling': self.task_params.semantic_task.sampling,
+                                  'class_nodes': self.task.class_nodes,
+                                  'dist_to_class': self.task.dist_to_class}
+      if self.logdir is not None:
+        self._debug_save_map_nodes(seed)
+  def reset(self, rngs):
+    rng = rngs[0]; rng_perturb = rngs[1];
+    nodes = self.task.nodes
+    tp = self.task_params
+    start_node_ids, goal_node_ids, dists, target_class = \
+        _nav_env_reset_helper(tp.type, rng, self.task.nodes, tp.batch_size,
+                              self.task.gtG, tp.max_dist, tp.num_steps,
+                              tp.num_goals, tp.data_augment,
+                              **(self.task.reset_kwargs))
+    start_nodes = [tuple(nodes[_,:]) for _ in start_node_ids]
+    goal_nodes = [[tuple(nodes[_,:]) for _ in __] for __ in goal_node_ids]
+    data_augment = tp.data_augment
+    perturbs = _gen_perturbs(rng_perturb, tp.batch_size,
+                             (tp.num_steps+1)*tp.num_goals,
+                             data_augment.lr_flip, data_augment.delta_angle,
+                             data_augment.delta_xy, data_augment.structured)
+    perturbs = np.array(perturbs) # batch x steps x 4
+    end_perturbs = perturbs[:,-(tp.num_goals):,:]*1 # fixed perturb for the goal.
+    perturbs = perturbs[:,:-(tp.num_goals),:]*1
+    history = -np.ones((tp.batch_size, tp.num_steps*tp.num_goals), dtype=np.int32)
+    self.episode = utils.Foo(
+        start_nodes=start_nodes, start_node_ids=start_node_ids,
+        goal_nodes=goal_nodes, goal_node_ids=goal_node_ids, dist_to_goal=dists,
+        perturbs=perturbs, goal_perturbs=end_perturbs, history=history,
+        target_class=target_class, history_frames=[])
+    return start_node_ids
+  def take_action(self, current_node_ids, action, step_number):
+    """In addition to returning the action, also returns the reward that the
+    agent receives."""
+    goal_number = step_number / self.task_params.num_steps
+    new_node_ids = GridWorld.take_action(self, current_node_ids, action)
+    rewards = []
+    for i, n in enumerate(new_node_ids):
+      reward = 0
+      if n == self.episode.goal_node_ids[goal_number][i]:
+        reward = self.task_params.reward_at_goal
+      reward = reward - self.task_params.reward_time_penalty
+      rewards.append(reward)
+    return new_node_ids, rewards
+  def get_optimal_action(self, current_node_ids, step_number):
+    """Returns the optimal action from the current node."""
+    goal_number = step_number / self.task_params.num_steps
+    gtG = self.task.gtG
+    a = np.zeros((len(current_node_ids), self.task_params.num_actions), dtype=np.int32)
+    d_dict = self.episode.dist_to_goal[goal_number]
+    for i, c in enumerate(current_node_ids):
+      neigh = gtG.vertex(c).out_neighbours()
+      neigh_edge = gtG.vertex(c).out_edges()
+      ds = np.array([d_dict[i][int(x)] for x in neigh])
+      ds_min = np.min(ds)
+      for i_, e in enumerate(neigh_edge):
+        if ds[i_] == ds_min:
+          _ = gtG.ep['action'][e]
+          a[i, _] = 1
+    return a
+  def get_targets(self, current_node_ids, step_number):
+    """Returns the target actions from the current node."""
+    action = self.get_optimal_action(current_node_ids, step_number)
+    action = np.expand_dims(action, axis=1)
+    return vars(utils.Foo(action=action))
+  def get_targets_name(self):
+    """Returns the list of names of the targets."""
+    return ['action']
+  def cleanup(self):
+    self.episode = None
+class VisualNavigationEnv(NavigationEnv):
+  """Class for doing visual navigation in environments. Functions for computing
+  features on states, etc.
+  """
+  def __init__(self, robot, env, task_params, category_list=None,
+               building_name=None, flip=False, logdir=None,
+               building_loader=None, r_obj=None):
+    tt = utils.Timer()
+    tt.tic()
+    Building.__init__(self, building_name, robot, env, category_list,
+                      small=task_params.toy_problem, flip=flip, logdir=logdir,
+                      building_loader=building_loader)
+    self.set_r_obj(r_obj)
+    self.task_params = task_params
+    self.task = None
+    self.episode = None
+    self._preprocess_for_task(self.task_params.building_seed)
+    if hasattr(self.task_params, 'map_scales'):
+      self.task.scaled_maps = resize_maps(
+          self.traversible.astype(np.float32)*1, self.task_params.map_scales,
+          self.task_params.map_resize_method)
+    else:
+      logging.fatal('VisualNavigationEnv does not support scale_f anymore.')
+    self.task.readout_maps_scaled = resize_maps(
+      self.traversible.astype(np.float32)*1,
+      self.task_params.readout_maps_scales,
+      self.task_params.map_resize_method)
+    tt.toc(log_at=1, log_str='VisualNavigationEnv __init__: ')
+  def get_weight(self):
+    return self.task.nodes.shape[0]
+  def get_common_data(self):
+    goal_nodes = self.episode.goal_nodes
+    start_nodes = self.episode.start_nodes
+    perturbs = self.episode.perturbs
+    goal_perturbs = self.episode.goal_perturbs
+    target_class = self.episode.target_class
+    goal_locs = []; rel_goal_locs = [];
+    for i in range(len(goal_nodes)):
+      end_nodes = goal_nodes[i]
+      goal_loc, _, _, goal_theta = self.get_loc_axis(
+          np.array(end_nodes), delta_theta=self.task.delta_theta,
+          perturb=goal_perturbs[:,i,:])
+      # Compute the relative location to all goals from the starting location.
+      loc, _, _, theta = self.get_loc_axis(np.array(start_nodes),
+                                           delta_theta=self.task.delta_theta,
+                                           perturb=perturbs[:,0,:])
+      r_goal, t_goal = _get_relative_goal_loc(goal_loc*1., loc, theta)
+      rel_goal_loc = np.concatenate((r_goal*np.cos(t_goal), r_goal*np.sin(t_goal),
+                                     np.cos(goal_theta-theta),
+                                     np.sin(goal_theta-theta)), axis=1)
+      rel_goal_locs.append(np.expand_dims(rel_goal_loc, axis=1))
+      goal_locs.append(np.expand_dims(goal_loc, axis=1))
+    map = self.traversible*1.
+    maps = np.repeat(np.expand_dims(np.expand_dims(map, axis=0), axis=0),
+                     self.task_params.batch_size, axis=0)*1
+    if self.task_params.type[:14] == 'to_nearest_obj':
+      for i in range(self.task_params.batch_size):
+        maps[i,0,:,:] += 0.5*(self.task.class_maps_dilated[:,:,target_class[i]])
+    rel_goal_locs = np.concatenate(rel_goal_locs, axis=1)
+    goal_locs = np.concatenate(goal_locs, axis=1)
+    maps = np.expand_dims(maps, axis=-1)
+    if self.task_params.type[:14] == 'to_nearest_obj':
+      rel_goal_locs = np.zeros((self.task_params.batch_size, 1,
+                                len(self.task_params.semantic_task.class_map_names)),
+                               dtype=np.float32)
+      goal_locs = np.zeros((self.task_params.batch_size, 1, 2),
+                           dtype=np.float32)
+      for i in range(self.task_params.batch_size):
+          t = target_class[i]
+          rel_goal_locs[i,0,t] = 1.
+          goal_locs[i,0,0] = t
+          goal_locs[i,0,1] = np.NaN
+    return vars(utils.Foo(orig_maps=maps, goal_loc=goal_locs,
+                          rel_goal_loc_at_start=rel_goal_locs))
+  def pre_common_data(self, inputs):
+    return inputs
+  def get_features(self, current_node_ids, step_number):
+    task_params = self.task_params
+    goal_number = step_number / self.task_params.num_steps
+    end_nodes = self.task.nodes[self.episode.goal_node_ids[goal_number],:]*1
+    current_nodes = self.task.nodes[current_node_ids,:]*1
+    end_perturbs = self.episode.goal_perturbs[:,goal_number,:][:,np.newaxis,:]
+    perturbs = self.episode.perturbs
+    target_class = self.episode.target_class
+    # Append to history.
+    self.episode.history[:,step_number] = np.array(current_node_ids)
+    # Render out the images from current node.
+    outs = {}
+    if self.task_params.outputs.images:
+      imgs_all = []
+      imgs = self.render_nodes([tuple(x) for x in current_nodes],
+                               perturb=perturbs[:,step_number,:])
+      imgs_all.append(imgs)
+      aux_delta_thetas = self.task_params.aux_delta_thetas
+      for i in range(len(aux_delta_thetas)):
+        imgs = self.render_nodes([tuple(x) for x in current_nodes],
+                                 perturb=perturbs[:,step_number,:],
+                                 aux_delta_theta=aux_delta_thetas[i])
+        imgs_all.append(imgs)
+      imgs_all = np.array(imgs_all) # A x B x H x W x C
+      imgs_all = np.transpose(imgs_all, axes=[1,0,2,3,4])
+      imgs_all = np.expand_dims(imgs_all, axis=1) # B x N x A x H x W x C
+      if task_params.num_history_frames > 0:
+        if step_number == 0:
+          # Append the same frame 4 times
+          for i in range(task_params.num_history_frames+1):
+            self.episode.history_frames.insert(0, imgs_all*1.)
+        self.episode.history_frames.insert(0, imgs_all)
+        self.episode.history_frames.pop()
+        imgs_all_with_history = np.concatenate(self.episode.history_frames, axis=2)
+      else:
+        imgs_all_with_history = imgs_all
+      outs['imgs'] = imgs_all_with_history # B x N x A x H x W x C
+    if self.task_params.outputs.node_ids:
+      outs['node_ids'] = np.array(current_node_ids).reshape((-1,1,1))
+      outs['perturbs'] = np.expand_dims(perturbs[:,step_number, :]*1., axis=1)
+    if self.task_params.outputs.analytical_counts:
+      assert(self.task_params.modalities == ['depth'])
+      d = image_pre(outs['imgs']*1., self.task_params.modalities)
+      cm = get_camera_matrix(self.task_params.img_width,
+                             self.task_params.img_height,
+                             self.task_params.img_fov)
+      XYZ = get_point_cloud_from_z(100./d[...,0], cm)
+      XYZ = make_geocentric(XYZ*100., self.robot.sensor_height,
+                                      self.robot.camera_elevation_degree)
+      for i in range(len(self.task_params.analytical_counts.map_sizes)):
+        non_linearity = self.task_params.analytical_counts.non_linearity[i]
+        count, isvalid = bin_points(XYZ*1.,
+                                    map_size=self.task_params.analytical_counts.map_sizes[i],
+                                    xy_resolution=self.task_params.analytical_counts.xy_resolution[i],
+                                    z_bins=self.task_params.analytical_counts.z_bins[i])
+        assert(count.shape[2] == 1), 'only works for n_views equal to 1.'
+        count = count[:,:,0,:,:,:]
+        isvalid = isvalid[:,:,0,:,:,:]
+        if non_linearity == 'none':
+          None
+        elif non_linearity == 'min10':
+          count = np.minimum(count, 10.)
+        elif non_linearity == 'sqrt':
+          count = np.sqrt(count)
+        else:
+          logging.fatal('Undefined non_linearity.')
+        outs['analytical_counts_{:d}'.format(i)] = count
+    # Compute the goal location in the cordinate frame of the robot.
+    if self.task_params.outputs.rel_goal_loc:
+      if self.task_params.type[:14] != 'to_nearest_obj':
+        loc, _, _, theta = self.get_loc_axis(current_nodes,
+                                             delta_theta=self.task.delta_theta,
+                                             perturb=perturbs[:,step_number,:])
+        goal_loc, _, _, goal_theta = self.get_loc_axis(end_nodes,
+                                                       delta_theta=self.task.delta_theta,
+                                                       perturb=end_perturbs[:,0,:])
+        r_goal, t_goal = _get_relative_goal_loc(goal_loc, loc, theta)
+        rel_goal_loc = np.concatenate((r_goal*np.cos(t_goal), r_goal*np.sin(t_goal),
+                                       np.cos(goal_theta-theta),
+                                       np.sin(goal_theta-theta)), axis=1)
+        outs['rel_goal_loc'] = np.expand_dims(rel_goal_loc, axis=1)
+      elif self.task_params.type[:14] == 'to_nearest_obj':
+        rel_goal_loc = np.zeros((self.task_params.batch_size, 1,
+                                 len(self.task_params.semantic_task.class_map_names)),
+                                dtype=np.float32)
+        for i in range(self.task_params.batch_size):
+          t = target_class[i]
+          rel_goal_loc[i,0,t] = 1.
+        outs['rel_goal_loc'] = rel_goal_loc
+    # Location on map to plot the trajectory during validation.
+    if self.task_params.outputs.loc_on_map:
+      loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes,
+                                                     delta_theta=self.task.delta_theta,
+                                                     perturb=perturbs[:,step_number,:])
+      outs['loc_on_map'] = np.expand_dims(loc, axis=1)
+    # Compute gt_dist to goal
+    if self.task_params.outputs.gt_dist_to_goal:
+      gt_dist_to_goal = np.zeros((len(current_node_ids), 1), dtype=np.float32)
+      for i, n in enumerate(current_node_ids):
+        gt_dist_to_goal[i,0] = self.episode.dist_to_goal[goal_number][i][n]
+      outs['gt_dist_to_goal'] = np.expand_dims(gt_dist_to_goal, axis=1)
+    # Free space in front of you, map and goal as images.
+    if self.task_params.outputs.ego_maps:
+      loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes,
+                                                     delta_theta=self.task.delta_theta,
+                                                     perturb=perturbs[:,step_number,:])
+      maps = generate_egocentric_maps(self.task.scaled_maps,
+                                      self.task_params.map_scales,
+                                      self.task_params.map_crop_sizes, loc,
+                                      x_axis, y_axis, theta)
+      for i in range(len(self.task_params.map_scales)):
+        outs['ego_maps_{:d}'.format(i)] = \
+            np.expand_dims(np.expand_dims(maps[i], axis=1), axis=-1)
+    if self.task_params.outputs.readout_maps:
+      loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes,
+                                                     delta_theta=self.task.delta_theta,
+                                                     perturb=perturbs[:,step_number,:])
+      maps = generate_egocentric_maps(self.task.readout_maps_scaled,
+                                      self.task_params.readout_maps_scales,
+                                      self.task_params.readout_maps_crop_sizes,
+                                      loc, x_axis, y_axis, theta)
+      for i in range(len(self.task_params.readout_maps_scales)):
+        outs['readout_maps_{:d}'.format(i)] = \
+            np.expand_dims(np.expand_dims(maps[i], axis=1), axis=-1)
+    # Images for the goal.
+    if self.task_params.outputs.ego_goal_imgs:
+      if self.task_params.type[:14] != 'to_nearest_obj': 
+        loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes,
+                                                       delta_theta=self.task.delta_theta,
+                                                       perturb=perturbs[:,step_number,:])
+        goal_loc, _, _, _ = self.get_loc_axis(end_nodes,
+                                              delta_theta=self.task.delta_theta,
+                                              perturb=end_perturbs[:,0,:])
+        rel_goal_orientation = np.mod(
+            np.int32(current_nodes[:,2:] - end_nodes[:,2:]), self.task_params.n_ori)
+        goal_dist, goal_theta = _get_relative_goal_loc(goal_loc, loc, theta)
+        goals = generate_goal_images(self.task_params.map_scales,
+                                     self.task_params.map_crop_sizes,
+                                     self.task_params.n_ori, goal_dist,
+                                     goal_theta, rel_goal_orientation)
+        for i in range(len(self.task_params.map_scales)):
+          outs['ego_goal_imgs_{:d}'.format(i)] = np.expand_dims(goals[i], axis=1)
+      elif self.task_params.type[:14] == 'to_nearest_obj':
+        for i in range(len(self.task_params.map_scales)):
+          num_classes = len(self.task_params.semantic_task.class_map_names)
+          outs['ego_goal_imgs_{:d}'.format(i)] = np.zeros((self.task_params.batch_size, 1,
+                                                           self.task_params.map_crop_sizes[i],
+                                                           self.task_params.map_crop_sizes[i],
+                                                           self.task_params.goal_channels))
+        for i in range(self.task_params.batch_size):
+          t = target_class[i]
+          for j in range(len(self.task_params.map_scales)):
+            outs['ego_goal_imgs_{:d}'.format(j)][i,:,:,:,t] = 1.
+    # Incremental locs and theta (for map warping), always in the original scale
+    # of the map, the subequent steps in the tf code scale appropriately.
+    # Scaling is done by just multiplying incremental_locs appropriately.
+    if self.task_params.outputs.egomotion:
+      if step_number == 0:
+        # Zero Ego Motion
+        incremental_locs = np.zeros((self.task_params.batch_size, 1, 2), dtype=np.float32)
+        incremental_thetas = np.zeros((self.task_params.batch_size, 1, 1), dtype=np.float32)
+      else:
+        previous_nodes = self.task.nodes[self.episode.history[:,step_number-1], :]*1
+        loc, _, _, theta = self.get_loc_axis(current_nodes,
+                                             delta_theta=self.task.delta_theta,
+                                             perturb=perturbs[:,step_number,:])
+        previous_loc, _, _, previous_theta = self.get_loc_axis(
+            previous_nodes, delta_theta=self.task.delta_theta,
+            perturb=perturbs[:,step_number-1,:])
+        incremental_locs_ = np.reshape(loc-previous_loc, [self.task_params.batch_size, 1, -1])
+        t = -np.pi/2+np.reshape(theta*1, [self.task_params.batch_size, 1, -1])
+        incremental_locs = incremental_locs_*1
+        incremental_locs[:,:,0] = np.sum(incremental_locs_ *
+                                         np.concatenate((np.cos(t), np.sin(t)),
+                                                        axis=-1), axis=-1)
+        incremental_locs[:,:,1] = np.sum(incremental_locs_ *
+                                         np.concatenate((np.cos(t+np.pi/2),
+                                                         np.sin(t+np.pi/2)),
+                                                        axis=-1), axis=-1)
+        incremental_thetas = np.reshape(theta-previous_theta,
+                                        [self.task_params.batch_size, 1, -1])
+      outs['incremental_locs'] = incremental_locs
+      outs['incremental_thetas'] = incremental_thetas
+    if self.task_params.outputs.visit_count:
+      # Output the visit count for this state, how many times has the current
+      # state been visited, and how far in the history was the last visit
+      # (except this one)
+      visit_count = np.zeros((self.task_params.batch_size, 1), dtype=np.int32)
+      last_visit = -np.ones((self.task_params.batch_size, 1), dtype=np.int32)
+      if step_number >= 1:
+        h = self.episode.history[:,:(step_number)]
+        visit_count[:,0] = np.sum(h == np.array(current_node_ids).reshape([-1,1]),
+                                  axis=1)
+        last_visit[:,0] = np.argmax(h[:,::-1] == np.array(current_node_ids).reshape([-1,1]),
+                                    axis=1) + 1
+        last_visit[visit_count == 0] = -1 # -1 if not visited.
+      outs['visit_count'] = np.expand_dims(visit_count, axis=1)
+      outs['last_visit'] = np.expand_dims(last_visit, axis=1)
+    return outs
+  def get_features_name(self):
+    f = []
+    if self.task_params.outputs.images:
+      f.append('imgs')
+    if self.task_params.outputs.rel_goal_loc:
+      f.append('rel_goal_loc')
+    if self.task_params.outputs.loc_on_map:
+      f.append('loc_on_map')
+    if self.task_params.outputs.gt_dist_to_goal:
+      f.append('gt_dist_to_goal')
+    if self.task_params.outputs.ego_maps:
+      for i in range(len(self.task_params.map_scales)):
+        f.append('ego_maps_{:d}'.format(i))
+    if self.task_params.outputs.readout_maps:
+      for i in range(len(self.task_params.readout_maps_scales)):
+        f.append('readout_maps_{:d}'.format(i))
+    if self.task_params.outputs.ego_goal_imgs:
+      for i in range(len(self.task_params.map_scales)):
+        f.append('ego_goal_imgs_{:d}'.format(i))
+    if self.task_params.outputs.egomotion:
+      f.append('incremental_locs')
+      f.append('incremental_thetas')
+    if self.task_params.outputs.visit_count:
+      f.append('visit_count')
+      f.append('last_visit')
+    if self.task_params.outputs.analytical_counts:
+      for i in range(len(self.task_params.analytical_counts.map_sizes)):
+        f.append('analytical_counts_{:d}'.format(i))
+    if self.task_params.outputs.node_ids:
+      f.append('node_ids')
+      f.append('perturbs')
+    return f
+  def pre_features(self, inputs):
+    if self.task_params.outputs.images:
+      inputs['imgs'] = image_pre(inputs['imgs'], self.task_params.modalities)
+    return inputs
+class BuildingMultiplexer():
+  def __init__(self, args, task_number):
+    params = vars(args)
+    for k in params.keys():
+      setattr(self, k, params[k])
+    self.task_number = task_number
+    self._pick_data(task_number)
+    logging.info('Env Class: %s.', self.env_class)
+    if self.task_params.task == 'planning':
+      self._setup_planner()
+    elif self.task_params.task == 'mapping':
+      self._setup_mapper()
+    elif self.task_params.task == 'map+plan':
+      self._setup_mapper()
+    else:
+      logging.error('Undefined task: %s'.format(self.task_params.task))
+  def _pick_data(self, task_number):
+    logging.error('Input Building Names: %s', self.building_names)
+    self.flip = [np.mod(task_number / len(self.building_names), 2) == 1]
+    id = np.mod(task_number, len(self.building_names))
+    self.building_names = [self.building_names[id]]
+    self.task_params.building_seed = task_number
+    logging.error('BuildingMultiplexer: Picked Building Name: %s', self.building_names)
+    self.building_names = self.building_names[0].split('+')
+    self.flip = [self.flip[0] for _ in self.building_names]
+    logging.error('BuildingMultiplexer: Picked Building Name: %s', self.building_names)
+    logging.error('BuildingMultiplexer: Flipping Buildings: %s', self.flip)
+    logging.error('BuildingMultiplexer: Set building_seed: %d', self.task_params.building_seed)
+    self.num_buildings = len(self.building_names)
+    logging.error('BuildingMultiplexer: Num buildings: %d', self.num_buildings)
+  def _setup_planner(self):
+    # Load building env class.
+    self.buildings = []
+    for i, building_name in enumerate(self.building_names):
+      b = self.env_class(robot=self.robot, env=self.env,
+                         task_params=self.task_params,
+                         building_name=building_name, flip=self.flip[i],
+                         logdir=self.logdir, building_loader=self.dataset)
+      self.buildings.append(b)
+  def _setup_mapper(self):
+    # Set up the renderer.
+    cp = self.camera_param
+    rgb_shader, d_shader = sru.get_shaders(cp.modalities)
+    r_obj = SwiftshaderRenderer()
+    r_obj.init_display(width=cp.width, height=cp.height, fov=cp.fov,
+                       z_near=cp.z_near, z_far=cp.z_far, rgb_shader=rgb_shader,
+                       d_shader=d_shader)
+    self.r_obj = r_obj
+    r_obj.clear_scene()
+    # Load building env class.
+    self.buildings = []
+    wt = []
+    for i, building_name in enumerate(self.building_names):
+      b = self.env_class(robot=self.robot, env=self.env,
+                         task_params=self.task_params,
+                         building_name=building_name, flip=self.flip[i],
+                         logdir=self.logdir, building_loader=self.dataset,
+                         r_obj=r_obj)
+      wt.append(b.get_weight())
+      b.load_building_into_scene()
+      b.set_building_visibility(False)
+      self.buildings.append(b)
+    wt = np.array(wt).astype(np.float32)
+    wt = wt / np.sum(wt+0.0001)
+    self.building_sampling_weights = wt
+  def sample_building(self, rng):
+    if self.num_buildings == 1:
+      building_id = rng.choice(range(len(self.building_names)))
+    else:
+      building_id = rng.choice(self.num_buildings,
+                               p=self.building_sampling_weights)
+    b = self.buildings[building_id]
+    instances = b._gen_rng(rng)
+    self._building_id = building_id
+    return self.buildings[building_id], instances
+  def sample_env(self, rngs):
+    rng = rngs[0];
+    if self.num_buildings == 1:
+      building_id = rng.choice(range(len(self.building_names)))
+    else:
+      building_id = rng.choice(self.num_buildings,
+                               p=self.building_sampling_weights)
+    return self.buildings[building_id]
+  def pre(self, inputs):
+    return self.buildings[self._building_id].pre(inputs)
+  def __del__(self):
+    self.r_obj.clear_scene()
+    logging.error('Clearing scene.')
--- a/cognitive_mapping_and_planning/datasets/nav_env_config.py
+++ b/cognitive_mapping_and_planning/datasets/nav_env_config.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Configs for stanford navigation environment.
+Base config for stanford navigation enviornment.
+"""
+import numpy as np
+import src.utils as utils
+import datasets.nav_env as nav_env
+def nav_env_base_config():
+  """Returns the base config for stanford navigation environment.
+  Returns:
+    Base config for stanford navigation environment.
+  """
+  robot = utils.Foo(radius=15,
+                    base=10,
+                    height=140,
+                    sensor_height=120,
+                    camera_elevation_degree=-15)
+  env = utils.Foo(padding=10,
+                  resolution=5,
+                  num_point_threshold=2,
+                  valid_min=-10,
+                  valid_max=200,
+                  n_samples_per_face=200)
+  camera_param = utils.Foo(width=225,
+                           height=225,
+                           z_near=0.05,
+                           z_far=20.0,
+                           fov=60.,
+                           modalities=['rgb'],
+                           img_channels=3)
+  data_augment = utils.Foo(lr_flip=0,
+                           delta_angle=0.5,
+                           delta_xy=4,
+                           relight=True,
+                           relight_fast=False, 
+                           structured=False) # if True, uses the same perturb for the whole episode.
+  outputs = utils.Foo(images=True,
+                      rel_goal_loc=False,
+                      loc_on_map=True,
+                      gt_dist_to_goal=True,
+                      ego_maps=False,
+                      ego_goal_imgs=False,
+                      egomotion=False,
+                      visit_count=False,
+                      analytical_counts=False,
+                      node_ids=True,
+                      readout_maps=False)
+  # class_map_names=['board', 'chair', 'door', 'sofa', 'table']
+  class_map_names = ['chair', 'door', 'table']
+  semantic_task = utils.Foo(class_map_names=class_map_names, pix_distance=16,
+                            sampling='uniform')
+  # time per iteration for cmp is 0.82 seconds per episode with 3.4s overhead per batch.
+  task_params = utils.Foo(max_dist=32,
+                          step_size=8,
+                          num_steps=40,
+                          num_actions=4,
+                          batch_size=4, 
+                          building_seed=0,
+                          num_goals=1,
+                          img_height=None,
+                          img_width=None,
+                          img_channels=None,
+                          modalities=None,
+                          outputs=outputs,
+                          map_scales=[1.],
+                          map_crop_sizes=[64],
+                          rel_goal_loc_dim=4,
+                          base_class='Building',
+                          task='map+plan',
+                          n_ori=4,
+                          type='room_to_room_many',
+                          data_augment=data_augment,
+                          room_regex='^((?!hallway).)*$',
+                          toy_problem=False,
+                          map_channels=1,
+                          gt_coverage=False,
+                          input_type='maps',
+                          full_information=False,
+                          aux_delta_thetas=[],
+                          semantic_task=semantic_task,
+                          num_history_frames=0,
+                          node_ids_dim=1,
+                          perturbs_dim=4,
+                          map_resize_method='linear_noantialiasing',
+                          readout_maps_channels=1,
+                          readout_maps_scales=[],
+                          readout_maps_crop_sizes=[],
+                          n_views=1,
+                          reward_time_penalty=0.1,
+                          reward_at_goal=1.,
+                          discount_factor=0.99,
+                          rejection_sampling_M=100,
+                          min_dist=None)
+  navtask_args = utils.Foo(
+      building_names=['area1_gates_wingA_floor1_westpart'],
+      env_class=nav_env.VisualNavigationEnv,
+      robot=robot,
+      task_params=task_params,
+      env=env,
+      camera_param=camera_param,
+      cache_rooms=True)
+  return navtask_args
--- a/cognitive_mapping_and_planning/matplotlibrc
+++ b/cognitive_mapping_and_planning/matplotlibrc
+backend      : agg