Removing research/community models

f5fc733a · Byzantine · 09bc9f54 · 09bc9f54 · 09bc9f54 · 09bc9f54
Commit f5fc733a authored Feb 03, 2022 by Byzantine
20 changed files
--- a/research/cognitive_planning/preprocessing/lenet_preprocessing.py
+++ b/research/cognitive_planning/preprocessing/lenet_preprocessing.py
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Provides utilities for preprocessing."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-slim = tf.contrib.slim
-
-
-def preprocess_image(image, output_height, output_width, is_training):
-  """Preprocesses the given image.
-
-  Args:
-    image: A `Tensor` representing an image of arbitrary size.
-    output_height: The height of the image after preprocessing.
-    output_width: The width of the image after preprocessing.
-    is_training: `True` if we're preprocessing the image for training and
-      `False` otherwise.
-
-  Returns:
-    A preprocessed image.
-  """
-  image = tf.to_float(image)
-  image = tf.image.resize_image_with_crop_or_pad(
-      image, output_width, output_height)
-  image = tf.subtract(image, 128.0)
-  image = tf.div(image, 128.0)
-  return image
--- a/research/cognitive_planning/preprocessing/preprocessing_factory.py
+++ b/research/cognitive_planning/preprocessing/preprocessing_factory.py
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains a factory for building various models."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from preprocessing import cifarnet_preprocessing
-from preprocessing import inception_preprocessing
-from preprocessing import lenet_preprocessing
-from preprocessing import vgg_preprocessing
-
-slim = tf.contrib.slim
-
-
-def get_preprocessing(name, is_training=False):
-  """Returns preprocessing_fn(image, height, width, **kwargs).
-
-  Args:
-    name: The name of the preprocessing function.
-    is_training: `True` if the model is being used for training and `False`
-      otherwise.
-
-  Returns:
-    preprocessing_fn: A function that preprocessing a single image (pre-batch).
-      It has the following signature:
-        image = preprocessing_fn(image, output_height, output_width, ...).
-
-  Raises:
-    ValueError: If Preprocessing `name` is not recognized.
-  """
-  preprocessing_fn_map = {
-      'cifarnet': cifarnet_preprocessing,
-      'inception': inception_preprocessing,
-      'inception_v1': inception_preprocessing,
-      'inception_v2': inception_preprocessing,
-      'inception_v3': inception_preprocessing,
-      'inception_v4': inception_preprocessing,
-      'inception_resnet_v2': inception_preprocessing,
-      'lenet': lenet_preprocessing,
-      'mobilenet_v1': inception_preprocessing,
-      'nasnet_mobile': inception_preprocessing,
-      'nasnet_large': inception_preprocessing,
-      'pnasnet_large': inception_preprocessing,
-      'resnet_v1_50': vgg_preprocessing,
-      'resnet_v1_101': vgg_preprocessing,
-      'resnet_v1_152': vgg_preprocessing,
-      'resnet_v1_200': vgg_preprocessing,
-      'resnet_v2_50': vgg_preprocessing,
-      'resnet_v2_101': vgg_preprocessing,
-      'resnet_v2_152': vgg_preprocessing,
-      'resnet_v2_200': vgg_preprocessing,
-      'vgg': vgg_preprocessing,
-      'vgg_a': vgg_preprocessing,
-      'vgg_16': vgg_preprocessing,
-      'vgg_19': vgg_preprocessing,
-  }
-
-  if name not in preprocessing_fn_map:
-    raise ValueError('Preprocessing name [%s] was not recognized' % name)
-
-  def preprocessing_fn(image, output_height, output_width, **kwargs):
-    return preprocessing_fn_map[name].preprocess_image(
-        image, output_height, output_width, is_training=is_training, **kwargs)
-
-  return preprocessing_fn
--- a/research/cognitive_planning/preprocessing/vgg_preprocessing.py
+++ b/research/cognitive_planning/preprocessing/vgg_preprocessing.py
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Provides utilities to preprocess images.
-
-The preprocessing steps for VGG were introduced in the following technical
-report:
-
-  Very Deep Convolutional Networks For Large-Scale Image Recognition
-  Karen Simonyan and Andrew Zisserman
-  arXiv technical report, 2015
-  PDF: http://arxiv.org/pdf/1409.1556.pdf
-  ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
-  CC-BY-4.0
-
-More information can be obtained from the VGG website:
-www.robots.ox.ac.uk/~vgg/research/very_deep/
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-slim = tf.contrib.slim
-
-_R_MEAN = 123.68
-_G_MEAN = 116.78
-_B_MEAN = 103.94
-
-_RESIZE_SIDE_MIN = 256
-_RESIZE_SIDE_MAX = 512
-
-
-def _crop(image, offset_height, offset_width, crop_height, crop_width):
-  """Crops the given image using the provided offsets and sizes.
-
-  Note that the method doesn't assume we know the input image size but it does
-  assume we know the input image rank.
-
-  Args:
-    image: an image of shape [height, width, channels].
-    offset_height: a scalar tensor indicating the height offset.
-    offset_width: a scalar tensor indicating the width offset.
-    crop_height: the height of the cropped image.
-    crop_width: the width of the cropped image.
-
-  Returns:
-    the cropped (and resized) image.
-
-  Raises:
-    InvalidArgumentError: if the rank is not 3 or if the image dimensions are
-      less than the crop size.
-  """
-  original_shape = tf.shape(image)
-
-  rank_assertion = tf.Assert(
-      tf.equal(tf.rank(image), 3),
-      ['Rank of image must be equal to 3.'])
-  with tf.control_dependencies([rank_assertion]):
-    cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]])
-
-  size_assertion = tf.Assert(
-      tf.logical_and(
-          tf.greater_equal(original_shape[0], crop_height),
-          tf.greater_equal(original_shape[1], crop_width)),
-      ['Crop size greater than the image size.'])
-
-  offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
-
-  # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
-  # define the crop size.
-  with tf.control_dependencies([size_assertion]):
-    image = tf.slice(image, offsets, cropped_shape)
-  return tf.reshape(image, cropped_shape)
-
-
-def _random_crop(image_list, crop_height, crop_width):
-  """Crops the given list of images.
-
-  The function applies the same crop to each image in the list. This can be
-  effectively applied when there are multiple image inputs of the same
-  dimension such as:
-
-    image, depths, normals = _random_crop([image, depths, normals], 120, 150)
-
-  Args:
-    image_list: a list of image tensors of the same dimension but possibly
-      varying channel.
-    crop_height: the new height.
-    crop_width: the new width.
-
-  Returns:
-    the image_list with cropped images.
-
-  Raises:
-    ValueError: if there are multiple image inputs provided with different size
-      or the images are smaller than the crop dimensions.
-  """
-  if not image_list:
-    raise ValueError('Empty image_list.')
-
-  # Compute the rank assertions.
-  rank_assertions = []
-  for i in range(len(image_list)):
-    image_rank = tf.rank(image_list[i])
-    rank_assert = tf.Assert(
-        tf.equal(image_rank, 3),
-        ['Wrong rank for tensor  %s [expected] [actual]',
-         image_list[i].name, 3, image_rank])
-    rank_assertions.append(rank_assert)
-
-  with tf.control_dependencies([rank_assertions[0]]):
-    image_shape = tf.shape(image_list[0])
-  image_height = image_shape[0]
-  image_width = image_shape[1]
-  crop_size_assert = tf.Assert(
-      tf.logical_and(
-          tf.greater_equal(image_height, crop_height),
-          tf.greater_equal(image_width, crop_width)),
-      ['Crop size greater than the image size.'])
-
-  asserts = [rank_assertions[0], crop_size_assert]
-
-  for i in range(1, len(image_list)):
-    image = image_list[i]
-    asserts.append(rank_assertions[i])
-    with tf.control_dependencies([rank_assertions[i]]):
-      shape = tf.shape(image)
-    height = shape[0]
-    width = shape[1]
-
-    height_assert = tf.Assert(
-        tf.equal(height, image_height),
-        ['Wrong height for tensor %s [expected][actual]',
-         image.name, height, image_height])
-    width_assert = tf.Assert(
-        tf.equal(width, image_width),
-        ['Wrong width for tensor %s [expected][actual]',
-         image.name, width, image_width])
-    asserts.extend([height_assert, width_assert])
-
-  # Create a random bounding box.
-  #
-  # Use tf.random_uniform and not numpy.random.rand as doing the former would
-  # generate random numbers at graph eval time, unlike the latter which
-  # generates random numbers at graph definition time.
-  with tf.control_dependencies(asserts):
-    max_offset_height = tf.reshape(image_height - crop_height + 1, [])
-  with tf.control_dependencies(asserts):
-    max_offset_width = tf.reshape(image_width - crop_width + 1, [])
-  offset_height = tf.random_uniform(
-      [], maxval=max_offset_height, dtype=tf.int32)
-  offset_width = tf.random_uniform(
-      [], maxval=max_offset_width, dtype=tf.int32)
-
-  return [_crop(image, offset_height, offset_width,
-                crop_height, crop_width) for image in image_list]
-
-
-def _central_crop(image_list, crop_height, crop_width):
-  """Performs central crops of the given image list.
-
-  Args:
-    image_list: a list of image tensors of the same dimension but possibly
-      varying channel.
-    crop_height: the height of the image following the crop.
-    crop_width: the width of the image following the crop.
-
-  Returns:
-    the list of cropped images.
-  """
-  outputs = []
-  for image in image_list:
-    image_height = tf.shape(image)[0]
-    image_width = tf.shape(image)[1]
-
-    offset_height = (image_height - crop_height) / 2
-    offset_width = (image_width - crop_width) / 2
-
-    outputs.append(_crop(image, offset_height, offset_width,
-                         crop_height, crop_width))
-  return outputs
-
-
-def _mean_image_subtraction(image, means):
-  """Subtracts the given means from each image channel.
-
-  For example:
-    means = [123.68, 116.779, 103.939]
-    image = _mean_image_subtraction(image, means)
-
-  Note that the rank of `image` must be known.
-
-  Args:
-    image: a tensor of size [height, width, C].
-    means: a C-vector of values to subtract from each channel.
-
-  Returns:
-    the centered image.
-
-  Raises:
-    ValueError: If the rank of `image` is unknown, if `image` has a rank other
-      than three or if the number of channels in `image` doesn't match the
-      number of values in `means`.
-  """
-  if image.get_shape().ndims != 3:
-    raise ValueError('Input must be of size [height, width, C>0]')
-  num_channels = image.get_shape().as_list()[-1]
-  if len(means) != num_channels:
-    raise ValueError('len(means) must match the number of channels')
-
-  channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
-  for i in range(num_channels):
-    channels[i] -= means[i]
-  return tf.concat(axis=2, values=channels)
-
-
-def _smallest_size_at_least(height, width, smallest_side):
-  """Computes new shape with the smallest side equal to `smallest_side`.
-
-  Computes new shape with the smallest side equal to `smallest_side` while
-  preserving the original aspect ratio.
-
-  Args:
-    height: an int32 scalar tensor indicating the current height.
-    width: an int32 scalar tensor indicating the current width.
-    smallest_side: A python integer or scalar `Tensor` indicating the size of
-      the smallest side after resize.
-
-  Returns:
-    new_height: an int32 scalar tensor indicating the new height.
-    new_width: and int32 scalar tensor indicating the new width.
-  """
-  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
-
-  height = tf.to_float(height)
-  width = tf.to_float(width)
-  smallest_side = tf.to_float(smallest_side)
-
-  scale = tf.cond(tf.greater(height, width),
-                  lambda: smallest_side / width,
-                  lambda: smallest_side / height)
-  new_height = tf.to_int32(tf.rint(height * scale))
-  new_width = tf.to_int32(tf.rint(width * scale))
-  return new_height, new_width
-
-
-def _aspect_preserving_resize(image, smallest_side):
-  """Resize images preserving the original aspect ratio.
-
-  Args:
-    image: A 3-D image `Tensor`.
-    smallest_side: A python integer or scalar `Tensor` indicating the size of
-      the smallest side after resize.
-
-  Returns:
-    resized_image: A 3-D tensor containing the resized image.
-  """
-  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
-
-  shape = tf.shape(image)
-  height = shape[0]
-  width = shape[1]
-  new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
-  image = tf.expand_dims(image, 0)
-  resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
-                                           align_corners=False)
-  resized_image = tf.squeeze(resized_image)
-  resized_image.set_shape([None, None, 3])
-  return resized_image
-
-
-def preprocess_for_train(image,
-                         output_height,
-                         output_width,
-                         resize_side_min=_RESIZE_SIDE_MIN,
-                         resize_side_max=_RESIZE_SIDE_MAX):
-  """Preprocesses the given image for training.
-
-  Note that the actual resizing scale is sampled from
-    [`resize_size_min`, `resize_size_max`].
-
-  Args:
-    image: A `Tensor` representing an image of arbitrary size.
-    output_height: The height of the image after preprocessing.
-    output_width: The width of the image after preprocessing.
-    resize_side_min: The lower bound for the smallest side of the image for
-      aspect-preserving resizing.
-    resize_side_max: The upper bound for the smallest side of the image for
-      aspect-preserving resizing.
-
-  Returns:
-    A preprocessed image.
-  """
-  resize_side = tf.random_uniform(
-      [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)
-
-  image = _aspect_preserving_resize(image, resize_side)
-  image = _random_crop([image], output_height, output_width)[0]
-  image.set_shape([output_height, output_width, 3])
-  image = tf.to_float(image)
-  image = tf.image.random_flip_left_right(image)
-  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
-
-
-def preprocess_for_eval(image, output_height, output_width, resize_side):
-  """Preprocesses the given image for evaluation.
-
-  Args:
-    image: A `Tensor` representing an image of arbitrary size.
-    output_height: The height of the image after preprocessing.
-    output_width: The width of the image after preprocessing.
-    resize_side: The smallest side of the image for aspect-preserving resizing.
-
-  Returns:
-    A preprocessed image.
-  """
-  image = _aspect_preserving_resize(image, resize_side)
-  image = _central_crop([image], output_height, output_width)[0]
-  image.set_shape([output_height, output_width, 3])
-  image = tf.to_float(image)
-  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
-
-
-def preprocess_image(image, output_height, output_width, is_training=False,
-                     resize_side_min=_RESIZE_SIDE_MIN,
-                     resize_side_max=_RESIZE_SIDE_MAX):
-  """Preprocesses the given image.
-
-  Args:
-    image: A `Tensor` representing an image of arbitrary size.
-    output_height: The height of the image after preprocessing.
-    output_width: The width of the image after preprocessing.
-    is_training: `True` if we're preprocessing the image for training and
-      `False` otherwise.
-    resize_side_min: The lower bound for the smallest side of the image for
-      aspect-preserving resizing. If `is_training` is `False`, then this value
-      is used for rescaling.
-    resize_side_max: The upper bound for the smallest side of the image for
-      aspect-preserving resizing. If `is_training` is `False`, this value is
-      ignored. Otherwise, the resize side is sampled from
-        [resize_size_min, resize_size_max].
-
-  Returns:
-    A preprocessed image.
-  """
-  if is_training:
-    return preprocess_for_train(image, output_height, output_width,
-                                resize_side_min, resize_side_max)
-  else:
-    return preprocess_for_eval(image, output_height, output_width,
-                               resize_side_min)
--- a/research/cognitive_planning/standard_fields.py
+++ b/research/cognitive_planning/standard_fields.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Contains classes specifying naming conventions used for object detection.
-
-
-Specifies:
-  InputDataFields: standard fields used by reader/preprocessor/batcher.
-  DetectionResultFields: standard fields returned by object detector.
-  BoxListFields: standard field used by BoxList
-  TfExampleFields: standard fields for tf-example data format (go/tf-example).
-"""
-
-
-class InputDataFields(object):
-  """Names for the input tensors.
-
-  Holds the standard data field names to use for identifying input tensors. This
-  should be used by the decoder to identify keys for the returned tensor_dict
-  containing input tensors. And it should be used by the model to identify the
-  tensors it needs.
-
-  Attributes:
-    image: image.
-    image_additional_channels: additional channels.
-    original_image: image in the original input size.
-    key: unique key corresponding to image.
-    source_id: source of the original image.
-    filename: original filename of the dataset (without common path).
-    groundtruth_image_classes: image-level class labels.
-    groundtruth_boxes: coordinates of the ground truth boxes in the image.
-    groundtruth_classes: box-level class labels.
-    groundtruth_label_types: box-level label types (e.g. explicit negative).
-    groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
-      is the groundtruth a single object or a crowd.
-    groundtruth_area: area of a groundtruth segment.
-    groundtruth_difficult: is a `difficult` object
-    groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
-      same class, forming a connected group, where instances are heavily
-      occluding each other.
-    proposal_boxes: coordinates of object proposal boxes.
-    proposal_objectness: objectness score of each proposal.
-    groundtruth_instance_masks: ground truth instance masks.
-    groundtruth_instance_boundaries: ground truth instance boundaries.
-    groundtruth_instance_classes: instance mask-level class labels.
-    groundtruth_keypoints: ground truth keypoints.
-    groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
-    groundtruth_label_scores: groundtruth label scores.
-    groundtruth_weights: groundtruth weight factor for bounding boxes.
-    num_groundtruth_boxes: number of groundtruth boxes.
-    true_image_shapes: true shapes of images in the resized images, as resized
-      images can be padded with zeros.
-    multiclass_scores: the label score per class for each box.
-  """
-  image = 'image'
-  image_additional_channels = 'image_additional_channels'
-  original_image = 'original_image'
-  key = 'key'
-  source_id = 'source_id'
-  filename = 'filename'
-  groundtruth_image_classes = 'groundtruth_image_classes'
-  groundtruth_boxes = 'groundtruth_boxes'
-  groundtruth_classes = 'groundtruth_classes'
-  groundtruth_label_types = 'groundtruth_label_types'
-  groundtruth_is_crowd = 'groundtruth_is_crowd'
-  groundtruth_area = 'groundtruth_area'
-  groundtruth_difficult = 'groundtruth_difficult'
-  groundtruth_group_of = 'groundtruth_group_of'
-  proposal_boxes = 'proposal_boxes'
-  proposal_objectness = 'proposal_objectness'
-  groundtruth_instance_masks = 'groundtruth_instance_masks'
-  groundtruth_instance_boundaries = 'groundtruth_instance_boundaries'
-  groundtruth_instance_classes = 'groundtruth_instance_classes'
-  groundtruth_keypoints = 'groundtruth_keypoints'
-  groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
-  groundtruth_label_scores = 'groundtruth_label_scores'
-  groundtruth_weights = 'groundtruth_weights'
-  num_groundtruth_boxes = 'num_groundtruth_boxes'
-  true_image_shape = 'true_image_shape'
-  multiclass_scores = 'multiclass_scores'
-
-
-class DetectionResultFields(object):
-  """Naming conventions for storing the output of the detector.
-
-  Attributes:
-    source_id: source of the original image.
-    key: unique key corresponding to image.
-    detection_boxes: coordinates of the detection boxes in the image.
-    detection_scores: detection scores for the detection boxes in the image.
-    detection_classes: detection-level class labels.
-    detection_masks: contains a segmentation mask for each detection box.
-    detection_boundaries: contains an object boundary for each detection box.
-    detection_keypoints: contains detection keypoints for each detection box.
-    num_detections: number of detections in the batch.
-  """
-
-  source_id = 'source_id'
-  key = 'key'
-  detection_boxes = 'detection_boxes'
-  detection_scores = 'detection_scores'
-  detection_classes = 'detection_classes'
-  detection_masks = 'detection_masks'
-  detection_boundaries = 'detection_boundaries'
-  detection_keypoints = 'detection_keypoints'
-  num_detections = 'num_detections'
-
-
-class BoxListFields(object):
-  """Naming conventions for BoxLists.
-
-  Attributes:
-    boxes: bounding box coordinates.
-    classes: classes per bounding box.
-    scores: scores per bounding box.
-    weights: sample weights per bounding box.
-    objectness: objectness score per bounding box.
-    masks: masks per bounding box.
-    boundaries: boundaries per bounding box.
-    keypoints: keypoints per bounding box.
-    keypoint_heatmaps: keypoint heatmaps per bounding box.
-    is_crowd: is_crowd annotation per bounding box.
-  """
-  boxes = 'boxes'
-  classes = 'classes'
-  scores = 'scores'
-  weights = 'weights'
-  objectness = 'objectness'
-  masks = 'masks'
-  boundaries = 'boundaries'
-  keypoints = 'keypoints'
-  keypoint_heatmaps = 'keypoint_heatmaps'
-  is_crowd = 'is_crowd'
-
-
-class TfExampleFields(object):
-  """TF-example proto feature names for object detection.
-
-  Holds the standard feature names to load from an Example proto for object
-  detection.
-
-  Attributes:
-    image_encoded: JPEG encoded string
-    image_format: image format, e.g. "JPEG"
-    filename: filename
-    channels: number of channels of image
-    colorspace: colorspace, e.g. "RGB"
-    height: height of image in pixels, e.g. 462
-    width: width of image in pixels, e.g. 581
-    source_id: original source of the image
-    image_class_text: image-level label in text format
-    image_class_label: image-level label in numerical format
-    object_class_text: labels in text format, e.g. ["person", "cat"]
-    object_class_label: labels in numbers, e.g. [16, 8]
-    object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
-    object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
-    object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
-    object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
-    object_view: viewpoint of object, e.g. ["frontal", "left"]
-    object_truncated: is object truncated, e.g. [true, false]
-    object_occluded: is object occluded, e.g. [true, false]
-    object_difficult: is object difficult, e.g. [true, false]
-    object_group_of: is object a single object or a group of objects
-    object_depiction: is object a depiction
-    object_is_crowd: [DEPRECATED, use object_group_of instead]
-      is the object a single object or a crowd
-    object_segment_area: the area of the segment.
-    object_weight: a weight factor for the object's bounding box.
-    instance_masks: instance segmentation masks.
-    instance_boundaries: instance boundaries.
-    instance_classes: Classes for each instance segmentation mask.
-    detection_class_label: class label in numbers.
-    detection_bbox_ymin: ymin coordinates of a detection box.
-    detection_bbox_xmin: xmin coordinates of a detection box.
-    detection_bbox_ymax: ymax coordinates of a detection box.
-    detection_bbox_xmax: xmax coordinates of a detection box.
-    detection_score: detection score for the class label and box.
-  """
-  image_encoded = 'image/encoded'
-  image_format = 'image/format'  # format is reserved keyword
-  filename = 'image/filename'
-  channels = 'image/channels'
-  colorspace = 'image/colorspace'
-  height = 'image/height'
-  width = 'image/width'
-  source_id = 'image/source_id'
-  image_class_text = 'image/class/text'
-  image_class_label = 'image/class/label'
-  object_class_text = 'image/object/class/text'
-  object_class_label = 'image/object/class/label'
-  object_bbox_ymin = 'image/object/bbox/ymin'
-  object_bbox_xmin = 'image/object/bbox/xmin'
-  object_bbox_ymax = 'image/object/bbox/ymax'
-  object_bbox_xmax = 'image/object/bbox/xmax'
-  object_view = 'image/object/view'
-  object_truncated = 'image/object/truncated'
-  object_occluded = 'image/object/occluded'
-  object_difficult = 'image/object/difficult'
-  object_group_of = 'image/object/group_of'
-  object_depiction = 'image/object/depiction'
-  object_is_crowd = 'image/object/is_crowd'
-  object_segment_area = 'image/object/segment/area'
-  object_weight = 'image/object/weight'
-  instance_masks = 'image/segmentation/object'
-  instance_boundaries = 'image/boundaries/object'
-  instance_classes = 'image/segmentation/object/class'
-  detection_class_label = 'image/detection/label'
-  detection_bbox_ymin = 'image/detection/bbox/ymin'
-  detection_bbox_xmin = 'image/detection/bbox/xmin'
-  detection_bbox_ymax = 'image/detection/bbox/ymax'
-  detection_bbox_xmax = 'image/detection/bbox/xmax'
-  detection_score = 'image/detection/score'
--- a/research/cognitive_planning/string_int_label_map_pb2.py
+++ b/research/cognitive_planning/string_int_label_map_pb2.py
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: object_detection/protos/string_int_label_map.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
-  name='object_detection/protos/string_int_label_map.proto',
-  package='object_detection.protos',
-  syntax='proto2',
-  serialized_pb=_b('\n2object_detection/protos/string_int_label_map.proto\x12\x17object_detection.protos\"G\n\x15StringIntLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"Q\n\x11StringIntLabelMap\x12<\n\x04item\x18\x01 \x03(\x0b\x32..object_detection.protos.StringIntLabelMapItem')
-)
-
-
-
-
-_STRINGINTLABELMAPITEM = _descriptor.Descriptor(
-  name='StringIntLabelMapItem',
-  full_name='object_detection.protos.StringIntLabelMapItem',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='name', full_name='object_detection.protos.StringIntLabelMapItem.name', index=0,
-      number=1, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='id', full_name='object_detection.protos.StringIntLabelMapItem.id', index=1,
-      number=2, type=5, cpp_type=1, label=1,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='display_name', full_name='object_detection.protos.StringIntLabelMapItem.display_name', index=2,
-      number=3, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=79,
-  serialized_end=150,
-)
-
-
-_STRINGINTLABELMAP = _descriptor.Descriptor(
-  name='StringIntLabelMap',
-  full_name='object_detection.protos.StringIntLabelMap',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='item', full_name='object_detection.protos.StringIntLabelMap.item', index=0,
-      number=1, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=152,
-  serialized_end=233,
-)
-
-_STRINGINTLABELMAP.fields_by_name['item'].message_type = _STRINGINTLABELMAPITEM
-DESCRIPTOR.message_types_by_name['StringIntLabelMapItem'] = _STRINGINTLABELMAPITEM
-DESCRIPTOR.message_types_by_name['StringIntLabelMap'] = _STRINGINTLABELMAP
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-StringIntLabelMapItem = _reflection.GeneratedProtocolMessageType('StringIntLabelMapItem', (_message.Message,), dict(
-  DESCRIPTOR = _STRINGINTLABELMAPITEM,
-  __module__ = 'object_detection.protos.string_int_label_map_pb2'
-  # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem)
-  ))
-_sym_db.RegisterMessage(StringIntLabelMapItem)
-
-StringIntLabelMap = _reflection.GeneratedProtocolMessageType('StringIntLabelMap', (_message.Message,), dict(
-  DESCRIPTOR = _STRINGINTLABELMAP,
-  __module__ = 'object_detection.protos.string_int_label_map_pb2'
-  # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap)
-  ))
-_sym_db.RegisterMessage(StringIntLabelMap)
-
-
-# @@protoc_insertion_point(module_scope)
--- a/research/cognitive_planning/tasks.py
+++ b/research/cognitive_planning/tasks.py
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A library of tasks.
-
-This interface is intended to implement a wide variety of navigation
-tasks. See go/navigation_tasks for a list.
-"""
-
-import abc
-import collections
-import math
-import threading
-import networkx as nx
-import numpy as np
-import tensorflow as tf
-#from pyglib import logging
-#import gin
-from envs import task_env
-from envs import util as envs_util
-
-
-# Utility functions.
-def _pad_or_clip_array(np_arr, arr_len, is_front_clip=True, output_mask=False):
-  """Make np_arr array to have length arr_len.
-
-  If the array is shorter than arr_len, then it is padded from the front with
-  zeros. If it is longer, then it is clipped either from the back or from the
-  front. Only the first dimension is modified.
-
-  Args:
-    np_arr: numpy array.
-    arr_len: integer scalar.
-    is_front_clip: a boolean. If true then clipping is done in the front,
-      otherwise in the back.
-    output_mask: If True, outputs a numpy array of rank 1 which represents
-      a mask of which values have been added (0 - added, 1 - actual output).
-
-  Returns:
-    A numpy array and the size of padding (as a python int32). This size is
-    negative is the array is clipped.
-  """
-  shape = list(np_arr.shape)
-  pad_size = arr_len - shape[0]
-  padded_or_clipped = None
-  if pad_size < 0:
-    if is_front_clip:
-      padded_or_clipped = np_arr[-pad_size:, :]
-    else:
-      padded_or_clipped = np_arr[:arr_len, :]
-  elif pad_size > 0:
-    padding = np.zeros([pad_size] + shape[1:], dtype=np_arr.dtype)
-    padded_or_clipped = np.concatenate([np_arr, padding], axis=0)
-  else:
-    padded_or_clipped = np_arr
-
-  if output_mask:
-    mask = np.ones((arr_len,), dtype=np.int)
-    if pad_size > 0:
-      mask[-pad_size:] = 0
-    return padded_or_clipped, pad_size, mask
-  else:
-    return padded_or_clipped, pad_size
-
-
-def classification_loss(truth, predicted, weights=None, is_one_hot=True):
-  """A cross entropy loss.
-
-  Computes the mean of cross entropy losses for all pairs of true labels and
-  predictions. It wraps around a tf implementation of the cross entropy loss
-  with additional reformating of the inputs. If the truth and predicted are
-  n-rank Tensors with n > 2, then these are reshaped to 2-rank Tensors. It
-  allows for truth to be specified as one hot vector or class indices. Finally,
-  a weight can be specified for each element in truth and predicted.
-
-  Args:
-    truth: an n-rank or (n-1)-rank Tensor containing labels. If is_one_hot is
-      True, then n-rank Tensor is expected, otherwise (n-1) rank one.
-    predicted: an n-rank float Tensor containing prediction probabilities.
-    weights: an (n-1)-rank float Tensor of weights
-    is_one_hot: a boolean.
-
-  Returns:
-    A TF float scalar.
-  """
-  num_labels = predicted.get_shape().as_list()[-1]
-  if not is_one_hot:
-    truth = tf.reshape(truth, [-1])
-    truth = tf.one_hot(
-        truth, depth=num_labels, on_value=1.0, off_value=0.0, axis=-1)
-  else:
-    truth = tf.reshape(truth, [-1, num_labels])
-  predicted = tf.reshape(predicted, [-1, num_labels])
-  losses = tf.nn.softmax_cross_entropy_with_logits(
-      labels=truth, logits=predicted)
-  if weights is not None:
-    losses = tf.boolean_mask(losses,
-                             tf.cast(tf.reshape(weights, [-1]), dtype=tf.bool))
-  return tf.reduce_mean(losses)
-
-
-class UnrolledTaskIOConfig(object):
-  """Configuration of task inputs and outputs.
-
-  A task can have multiple inputs, which define the context, and a task query
-  which defines what is to be executed in this context. The desired execution
-  is encoded in an output. The config defines the shapes of the inputs, the
-  query and the outputs.
-  """
-
-  def __init__(self, inputs, output, query=None):
-    """Constructs a Task input/output config.
-
-    Args:
-      inputs: a list of tuples. Each tuple represents the configuration of an
-        input, with first element being the type (a string value) and the second
-        element the shape.
-      output: a tuple representing the configuration of the output.
-      query: a tuple representing the configuration of the query. If no query,
-        then None.
-    """
-    # A configuration of a single input, output or query. Consists of the type,
-    # which can be one of the three specified above, and a shape. The shape must
-    # be consistent with the type, e.g. if type == 'image', then shape is a 3
-    # valued list.
-    io_config = collections.namedtuple('IOConfig', ['type', 'shape'])
-
-    def assert_config(config):
-      if not isinstance(config, tuple):
-        raise ValueError('config must be a tuple. Received {}'.format(
-            type(config)))
-      if len(config) != 2:
-        raise ValueError('config must have 2 elements, has %d' % len(config))
-      if not isinstance(config[0], tf.DType):
-        raise ValueError('First element of config must be a tf.DType.')
-      if not isinstance(config[1], list):
-        raise ValueError('Second element of config must be a list.')
-
-    assert isinstance(inputs, collections.OrderedDict)
-    for modality_type in inputs:
-      assert_config(inputs[modality_type])
-    self._inputs = collections.OrderedDict(
-        [(k, io_config(*value)) for k, value in inputs.iteritems()])
-
-    if query is not None:
-      assert_config(query)
-      self._query = io_config(*query)
-    else:
-      self._query = None
-
-    assert_config(output)
-    self._output = io_config(*output)
-
-  @property
-  def inputs(self):
-    return self._inputs
-
-  @property
-  def output(self):
-    return self._output
-
-  @property
-  def query(self):
-    return self._query
-
-
-class UnrolledTask(object):
-  """An interface for a Task which can be unrolled during training.
-
-  Each example is called episode and consists of inputs and target output, where
-  the output can be considered as desired unrolled sequence of actions for the
-  inputs. For the specified tasks, these action sequences are to be
-  unambiguously definable.
-  """
-  __metaclass__ = abc.ABCMeta
-
-  def __init__(self, config):
-    assert isinstance(config, UnrolledTaskIOConfig)
-    self._config = config
-    # A dict of bookkeeping variables.
-    self.info = {}
-    # Tensorflow input is multithreaded and this lock is needed to prevent
-    # race condition in the environment. Without the lock, non-thread safe
-    # environments crash.
-    self._lock = threading.Lock()
-
-  @property
-  def config(self):
-    return self._config
-
-  @abc.abstractmethod
-  def episode(self):
-    """Returns data needed to train and test a single episode.
-
-    Each episode consists of inputs, which define the context of the task, a
-    query which defines the task, and a target output, which defines a
-    sequence of actions to be executed for this query. This sequence should not
-    require feedback, i.e. can be predicted purely from input and query.]
-
-    Returns:
-      inputs, query, output, where inputs is a list of numpy arrays and query
-      and output are numpy arrays. These arrays must be of shape and type as
-      specified in the task configuration.
-    """
-    pass
-
-  def reset(self, observation):
-    """Called after the environment is reset."""
-    pass
-
-  def episode_batch(self, batch_size):
-    """Returns a batch of episodes.
-
-    Args:
-      batch_size: size of batch.
-
-    Returns:
-      (inputs, query, output, masks) where inputs is list of numpy arrays and
-      query, output, and mask are numpy arrays. These arrays must be of shape
-      and type as specified in the task configuration with one additional
-      preceding dimension corresponding to the batch.
-
-    Raises:
-      ValueError: if self.episode() returns illegal values.
-    """
-    batched_inputs = collections.OrderedDict(
-        [[mtype, []] for mtype in self.config.inputs])
-    batched_queries = []
-    batched_outputs = []
-    batched_masks = []
-    for _ in range(int(batch_size)):
-      with self._lock:
-        # The episode function needs to be thread-safe. Since the current
-        # implementation for the envs are not thread safe we need to have lock
-        # the operations here.
-        inputs, query, outputs = self.episode()
-      if not isinstance(outputs, tuple):
-        raise ValueError('Outputs return value must be tuple.')
-      if len(outputs) != 2:
-        raise ValueError('Output tuple must be of size 2.')
-      if inputs is not None:
-        for modality_type in batched_inputs:
-          batched_inputs[modality_type].append(
-              np.expand_dims(inputs[modality_type], axis=0))
-
-      if query is not None:
-        batched_queries.append(np.expand_dims(query, axis=0))
-      batched_outputs.append(np.expand_dims(outputs[0], axis=0))
-      if outputs[1] is not None:
-        batched_masks.append(np.expand_dims(outputs[1], axis=0))
-
-    batched_inputs = {
-        k: np.concatenate(i, axis=0) for k, i in batched_inputs.iteritems()
-    }
-    if batched_queries:
-      batched_queries = np.concatenate(batched_queries, axis=0)
-    batched_outputs = np.concatenate(batched_outputs, axis=0)
-    if batched_masks:
-      batched_masks = np.concatenate(batched_masks, axis=0).astype(np.float32)
-    else:
-      # When the array is empty, the default np.dtype is float64 which causes
-      # py_func to crash in the tests.
-      batched_masks = np.array([], dtype=np.float32)
-    batched_inputs = [batched_inputs[k] for k in self._config.inputs]
-    return batched_inputs, batched_queries, batched_outputs, batched_masks
-
-  def tf_episode_batch(self, batch_size):
-    """A batch of episodes as TF Tensors.
-
-    Same as episode_batch with the difference that the return values are TF
-    Tensors.
-
-    Args:
-      batch_size: a python float for the batch size.
-
-    Returns:
-      inputs, query, output, mask where inputs is a dictionary of tf.Tensor
-      where the keys are the modality types specified in the config.inputs.
-      query, output, and mask are TF Tensors. These tensors must
-      be of shape and type as specified in the task configuration with one
-      additional preceding  dimension corresponding to the batch. Both mask and
-      output have the same shape as output.
-    """
-
-    # Define TF outputs.
-    touts = []
-    shapes = []
-    for _, i in self._config.inputs.iteritems():
-      touts.append(i.type)
-      shapes.append(i.shape)
-    if self._config.query is not None:
-      touts.append(self._config.query.type)
-      shapes.append(self._config.query.shape)
-    # Shapes and types for batched_outputs.
-    touts.append(self._config.output.type)
-    shapes.append(self._config.output.shape)
-    # Shapes and types for batched_masks.
-    touts.append(self._config.output.type)
-    shapes.append(self._config.output.shape[0:1])
-
-    def episode_batch_func():
-      if self.config.query is None:
-        inp, _, output, masks = self.episode_batch(int(batch_size))
-        return tuple(inp) + (output, masks)
-      else:
-        inp, query, output, masks = self.episode_batch(int(batch_size))
-        return tuple(inp) + (query, output, masks)
-
-    tf_episode_batch = tf.py_func(episode_batch_func, [], touts,
-                                  stateful=True, name='taskdata')
-    for episode, shape in zip(tf_episode_batch, shapes):
-      episode.set_shape([batch_size] + shape)
-
-    tf_episode_batch_dict = collections.OrderedDict([
-        (mtype, episode)
-        for mtype, episode in zip(self.config.inputs.keys(), tf_episode_batch)
-    ])
-    cur_index = len(self.config.inputs.keys())
-    tf_query = None
-    if self.config.query is not None:
-      tf_query = tf_episode_batch[cur_index]
-      cur_index += 1
-    tf_outputs = tf_episode_batch[cur_index]
-    tf_masks = tf_episode_batch[cur_index + 1]
-
-    return tf_episode_batch_dict, tf_query, tf_outputs, tf_masks
-
-  @abc.abstractmethod
-  def target_loss(self, true_targets, targets, weights=None):
-    """A loss for training a task model.
-
-    This loss measures the discrepancy between the task outputs, the true and
-    predicted ones.
-
-    Args:
-      true_targets: tf.Tensor of shape and type as defined in the task config
-        containing the true outputs.
-      targets: tf.Tensor of shape and type as defined in the task config
-        containing the predicted outputs.
-      weights: a bool tf.Tensor of shape as targets. Only true values are
-        considered when formulating the loss.
-    """
-    pass
-
-  def reward(self, obs, done, info):
-    """Returns a reward.
-
-    The tasks has to compute a reward based on the state of the environment. The
-    reward computation, though, is task specific. The task is to use the
-    environment interface, as defined in task_env.py, to compute the reward. If
-    this interface does not expose enough information, it is to be updated.
-
-    Args:
-      obs: Observation from environment's step function.
-      done: Done flag from environment's step function.
-      info: Info dict from environment's step function.
-
-    Returns:
-      obs: Observation.
-      reward: Floating point value.
-      done: Done flag.
-      info: Info dict.
-    """
-    # Default implementation does not do anything.
-    return obs, 0.0, done, info
-
-
-class RandomExplorationBasedTask(UnrolledTask):
-  """A Task which starts with a random exploration of the environment."""
-
-  def __init__(self,
-               env,
-               seed,
-               add_query_noise=False,
-               query_noise_var=0.0,
-               *args,
-               **kwargs):  # pylint: disable=keyword-arg-before-vararg
-    """Initializes a Task using a random exploration runs.
-
-    Args:
-      env: an instance of type TaskEnv and gym.Env.
-      seed: a random seed.
-      add_query_noise: boolean, if True then whatever queries are generated,
-        they are randomly perturbed. The semantics of the queries depends on the
-        concrete task implementation.
-      query_noise_var: float, the variance of Gaussian noise used for query
-        perturbation. Used iff add_query_noise==True.
-      *args: see super class.
-      **kwargs: see super class.
-    """
-    super(RandomExplorationBasedTask, self).__init__(*args, **kwargs)
-    assert isinstance(env, task_env.TaskEnv)
-    self._env = env
-    self._env.set_task(self)
-    self._rng = np.random.RandomState(seed)
-    self._add_query_noise = add_query_noise
-    self._query_noise_var = query_noise_var
-
-    # GoToStaticXTask can also take empty config but for the rest of the classes
-    # the number of modality types is 1.
-    if len(self.config.inputs.keys()) > 1:
-      raise NotImplementedError('current implementation supports input '
-                                'with only one modality type or less.')
-
-  def _exploration(self):
-    """Generates a random exploration run.
-
-    The function uses the environment to generate a run.
-
-    Returns:
-      A tuple of numpy arrays. The i-th array contains observation of type and
-      shape as specified in config.inputs[i].
-      A list of states along the exploration path.
-      A list of vertex indices corresponding to the path of the exploration.
-    """
-    in_seq_len = self._config.inputs.values()[0].shape[0]
-    path, _, states, step_outputs = self._env.random_step_sequence(
-        min_len=in_seq_len)
-    obs = {modality_type: [] for modality_type in self._config.inputs}
-    for o in step_outputs:
-      step_obs, _, done, _ = o
-      # It is expected that each value of step_obs is a dict of observations,
-      # whose dimensions are consistent with the config.inputs sizes.
-      for modality_type in self._config.inputs:
-        assert modality_type in step_obs, '{}'.format(type(step_obs))
-        o = step_obs[modality_type]
-        i = self._config.inputs[modality_type]
-        assert len(o.shape) == len(i.shape) - 1
-        for dim_o, dim_i in zip(o.shape, i.shape[1:]):
-          assert dim_o == dim_i, '{} != {}'.format(dim_o, dim_i)
-        obs[modality_type].append(o)
-      if done:
-        break
-
-    if not obs:
-      return obs, states, path
-
-    max_path_len = int(
-        round(in_seq_len * float(len(path)) / float(len(obs.values()[0]))))
-    path = path[-max_path_len:]
-    states = states[-in_seq_len:]
-
-    # The above obs is a list of tuples of np,array. Re-format them as tuple of
-    # np.array, each array containing all observations from all steps.
-    def regroup(obs, i):
-      """Regroups observations.
-
-      Args:
-        obs: a list of tuples of same size. The k-th tuple contains all the
-          observations from k-th step. Each observation is a numpy array.
-        i: the index of the observation in each tuple to be grouped.
-
-      Returns:
-        A numpy array of shape config.inputs[i] which contains all i-th
-        observations from all steps. These are concatenated along the first
-        dimension. In addition, if the number of observations is different from
-        the one specified in config.inputs[i].shape[0], then the array is either
-        padded from front or clipped.
-      """
-      grouped_obs = np.concatenate(
-          [np.expand_dims(o, axis=0) for o in obs[i]], axis=0)
-      in_seq_len = self._config.inputs[i].shape[0]
-      # pylint: disable=unbalanced-tuple-unpacking
-      grouped_obs, _ = _pad_or_clip_array(
-          grouped_obs, in_seq_len, is_front_clip=True)
-      return grouped_obs
-
-    all_obs = {i: regroup(obs, i) for i in self._config.inputs}
-
-    return all_obs, states, path
-
-  def _obs_to_state(self, path, states):
-    """Computes mapping between path nodes and states."""
-    # Generate a numpy array of locations corresponding to the path vertices.
-    path_coordinates = map(self._env.vertex_to_pose, path)
-    path_coordinates = np.concatenate(
-        [np.reshape(p, [1, 2]) for p in path_coordinates])
-
-    # The observations are taken along a smoothed trajectory following the path.
-    # We compute a mapping between the obeservations and the map vertices.
-    path_to_obs = collections.defaultdict(list)
-    obs_to_state = []
-    for i, s in enumerate(states):
-      location = np.reshape(s[0:2], [1, 2])
-      index = np.argmin(
-          np.reshape(
-              np.sum(np.power(path_coordinates - location, 2), axis=1), [-1]))
-      index = path[index]
-      path_to_obs[index].append(i)
-      obs_to_state.append(index)
-    return path_to_obs, obs_to_state
-
-  def _perturb_state(self, state, noise_var):
-    """Perturbes the state.
-
-    The location are purturbed using a Gaussian noise with variance
-    noise_var. The orientation is uniformly sampled.
-
-    Args:
-      state: a numpy array containing an env state (x, y locations).
-      noise_var: float
-    Returns:
-      The perturbed state.
-    """
-
-    def normal(v, std):
-      if std > 0:
-        n = self._rng.normal(0.0, std)
-        n = min(n, 2.0 * std)
-        n = max(n, -2.0 * std)
-        return v + n
-      else:
-        return v
-
-    state = state.copy()
-    state[0] = normal(state[0], noise_var)
-    state[1] = normal(state[1], noise_var)
-    if state.size > 2:
-      state[2] = self._rng.uniform(-math.pi, math.pi)
-    return state
-
-  def _sample_obs(self,
-                  indices,
-                  observations,
-                  observation_states,
-                  path_to_obs,
-                  max_obs_index=None,
-                  use_exploration_obs=True):
-    """Samples one observation which corresponds to vertex_index in path.
-
-    In addition, the sampled observation must have index in observations less
-    than max_obs_index. If these two conditions cannot be satisfied the
-    function returns None.
-
-    Args:
-      indices: a list of integers.
-      observations: a list of numpy arrays containing all the observations.
-      observation_states: a list of numpy arrays, each array representing the
-        state of the observation.
-      path_to_obs: a dict of path indices to lists of observation indices.
-      max_obs_index: an integer.
-      use_exploration_obs: if True, then the observation is sampled among the
-        specified observations, otherwise it is obtained from the environment.
-    Returns:
-      A tuple of:
-        -- A numpy array of size width x height x 3 representing the sampled
-          observation.
-        -- The index of the sampld observation among the input observations.
-        -- The state at which the observation is captured.
-    Raises:
-      ValueError: if the observation and observation_states lists are of
-        different lengths.
-    """
-    if len(observations) != len(observation_states):
-      raise ValueError('observation and observation_states lists must have '
-                       'equal lengths')
-    if not indices:
-      return None, None, None
-    vertex_index = self._rng.choice(indices)
-    if use_exploration_obs:
-      obs_indices = path_to_obs[vertex_index]
-
-      if max_obs_index is not None:
-        obs_indices = [i for i in obs_indices if i < max_obs_index]
-
-      if obs_indices:
-        index = self._rng.choice(obs_indices)
-        if self._add_query_noise:
-          xytheta = self._perturb_state(observation_states[index],
-                                        self._query_noise_var)
-          return self._env.observation(xytheta), index, xytheta
-        else:
-          return observations[index], index, observation_states[index]
-      else:
-        return None, None, None
-    else:
-      xy = self._env.vertex_to_pose(vertex_index)
-      xytheta = np.array([xy[0], xy[1], 0.0])
-      xytheta = self._perturb_state(xytheta, self._query_noise_var)
-      return self._env.observation(xytheta), None, xytheta
-
-
-class AreNearbyTask(RandomExplorationBasedTask):
-  """A task of identifying whether a query is nearby current location or not.
-
-  The query is guaranteed to be in proximity of an already visited location,
-  i.e. close to one of the observations. For each observation we have one
-  query, which is either close or not to this observation.
-  """
-
-  def __init__(
-      self,
-      max_distance=0,
-      *args,
-      **kwargs):  # pylint: disable=keyword-arg-before-vararg
-    super(AreNearbyTask, self).__init__(*args, **kwargs)
-    self._max_distance = max_distance
-
-    if len(self.config.inputs.keys()) != 1:
-      raise NotImplementedError('current implementation supports input '
-                                'with only one modality type')
-
-  def episode(self):
-    """Episode data.
-
-    Returns:
-      observations: a tuple with one element. This element is a numpy array of
-        size in_seq_len x observation_size x observation_size x 3 containing
-        in_seq_len images.
-      query: a numpy array of size
-        in_seq_len x observation_size X observation_size x 3 containing a query
-        image.
-      A tuple of size two. First element is a in_seq_len x 2 numpy array of
-        either 1.0 or 0.0. The i-th element denotes whether the i-th query
-        image is neraby (value 1.0) or not (value 0.0) to the i-th observation.
-        The second element in the tuple is a mask, a numpy array of size
-        in_seq_len x 1 and values 1.0 or 0.0 denoting whether the query is
-        valid or not (it can happen that the query is not valid, e.g. there are
-        not enough observations to have a meaningful queries).
-    """
-    observations, states, path = self._exploration()
-    assert len(observations.values()[0]) == len(states)
-
-    # The observations are taken along a smoothed trajectory following the path.
-    # We compute a mapping between the obeservations and the map vertices.
-    path_to_obs, obs_to_path = self._obs_to_state(path, states)
-
-    # Go over all observations, and sample a query. With probability 0.5 this
-    # query is a nearby observation (defined as belonging to the same vertex
-    # in path).
-    g = self._env.graph
-    queries = []
-    labels = []
-    validity_masks = []
-    query_index_in_observations = []
-    for i, curr_o in enumerate(observations.values()[0]):
-      p = obs_to_path[i]
-      low = max(0, i - self._max_distance)
-
-      # A list of lists of vertex indices. Each list in this group corresponds
-      # to one possible label.
-      index_groups = [[], [], []]
-      # Nearby visited indices, label 1.
-      nearby_visited = [
-          ii for ii in path[low:i + 1] + g[p].keys() if ii in obs_to_path[:i]
-      ]
-      nearby_visited = [ii for ii in index_groups[1] if ii in path_to_obs]
-      # NOT Nearby visited indices, label 0.
-      not_nearby_visited = [ii for ii in path[:low] if ii not in g[p].keys()]
-      not_nearby_visited = [ii for ii in index_groups[0] if ii in path_to_obs]
-      # NOT visited indices, label 2.
-      not_visited = [
-          ii for ii in range(g.number_of_nodes()) if ii not in path[:i + 1]
-      ]
-
-      index_groups = [not_nearby_visited, nearby_visited, not_visited]
-
-      # Consider only labels for which there are indices.
-      allowed_labels = [ii for ii, group in enumerate(index_groups) if group]
-      label = self._rng.choice(allowed_labels)
-
-      indices = list(set(index_groups[label]))
-      max_obs_index = None if label == 2 else i
-      use_exploration_obs = False if label == 2 else True
-      o, obs_index, _ = self._sample_obs(
-          indices=indices,
-          observations=observations.values()[0],
-          observation_states=states,
-          path_to_obs=path_to_obs,
-          max_obs_index=max_obs_index,
-          use_exploration_obs=use_exploration_obs)
-      query_index_in_observations.append(obs_index)
-
-      # If we cannot sample a valid query, we mark it as not valid in mask.
-      if o is None:
-        label = 0.0
-        o = curr_o
-        validity_masks.append(0)
-      else:
-        validity_masks.append(1)
-
-      queries.append(o.values()[0])
-      labels.append(label)
-
-    query = np.concatenate([np.expand_dims(q, axis=0) for q in queries], axis=0)
-
-    def one_hot(label, num_labels=3):
-      a = np.zeros((num_labels,), dtype=np.float)
-      a[int(label)] = 1.0
-      return a
-
-    outputs = np.stack([one_hot(l) for l in labels], axis=0)
-    validity_mask = np.reshape(
-        np.array(validity_masks, dtype=np.int32), [-1, 1])
-
-    self.info['query_index_in_observations'] = query_index_in_observations
-    self.info['observation_states'] = states
-
-    return observations, query, (outputs, validity_mask)
-
-  def target_loss(self, truth, predicted, weights=None):
-    pass
-
-
-class NeighboringQueriesTask(RandomExplorationBasedTask):
-  """A task of identifying whether two queries are closeby or not.
-
-  The proximity between queries is defined by the length of the shorest path
-  between them.
-  """
-
-  def __init__(
-      self,
-      max_distance=1,
-      *args,
-      **kwargs):   # pylint: disable=keyword-arg-before-vararg
-    """Initializes a NeighboringQueriesTask.
-
-    Args:
-      max_distance: integer, the maximum distance in terms of number of vertices
-        between the two queries, so that they are considered neighboring.
-      *args: for super class.
-      **kwargs: for super class.
-    """
-    super(NeighboringQueriesTask, self).__init__(*args, **kwargs)
-    self._max_distance = max_distance
-    if len(self.config.inputs.keys()) != 1:
-      raise NotImplementedError('current implementation supports input '
-                                'with only one modality type')
-
-  def episode(self):
-    """Episode data.
-
-    Returns:
-      observations: a tuple with one element. This element is a numpy array of
-        size in_seq_len x observation_size x observation_size x 3 containing
-        in_seq_len images.
-      query: a numpy array of size
-        2 x observation_size X observation_size x 3 containing a pair of query
-        images.
-      A tuple of size two. First element is a numpy array of size 2 containing
-        a one hot vector of whether the two observations are neighobring. Second
-        element is a boolean numpy value denoting whether this is a valid
-        episode.
-    """
-    observations, states, path = self._exploration()
-    assert len(observations.values()[0]) == len(states)
-    path_to_obs, _ = self._obs_to_state(path, states)
-    # Restrict path to ones for which observations have been generated.
-    path = [p for p in path if p in path_to_obs]
-    # Sample first query.
-    query1_index = self._rng.choice(path)
-    # Sample label.
-    label = self._rng.randint(2)
-    # Sample second query.
-    # If label == 1, then second query must be nearby, otherwise not.
-    closest_indices = nx.single_source_shortest_path(
-        self._env.graph, query1_index, self._max_distance).keys()
-    if label == 0:
-      # Closest indices on the path.
-      indices = [p for p in path if p not in closest_indices]
-    else:
-      # Indices which are not closest on the path.
-      indices = [p for p in closest_indices if p in path]
-
-    query2_index = self._rng.choice(indices)
-    # Generate an observation.
-    query1, query1_index, _ = self._sample_obs(
-        [query1_index],
-        observations.values()[0],
-        states,
-        path_to_obs,
-        max_obs_index=None,
-        use_exploration_obs=True)
-    query2, query2_index, _ = self._sample_obs(
-        [query2_index],
-        observations.values()[0],
-        states,
-        path_to_obs,
-        max_obs_index=None,
-        use_exploration_obs=True)
-
-    queries = np.concatenate(
-        [np.expand_dims(q, axis=0) for q in [query1, query2]])
-    labels = np.array([0, 0])
-    labels[label] = 1
-    is_valid = np.array([1])
-
-    self.info['observation_states'] = states
-    self.info['query_indices_in_observations'] = [query1_index, query2_index]
-
-    return observations, queries, (labels, is_valid)
-
-  def target_loss(self, truth, predicted, weights=None):
-    pass
-
-
-#@gin.configurable
-class GotoStaticXTask(RandomExplorationBasedTask):
-  """Task go to a static X.
-
-  If continuous reward is used only one goal is allowed so that the reward can
-  be computed as a delta-distance to that goal..
-  """
-
-  def __init__(self,
-               step_reward=0.0,
-               goal_reward=1.0,
-               hit_wall_reward=-1.0,
-               done_at_target=False,
-               use_continuous_reward=False,
-               *args,
-               **kwargs):  # pylint: disable=keyword-arg-before-vararg
-    super(GotoStaticXTask, self).__init__(*args, **kwargs)
-    if len(self.config.inputs.keys()) > 1:
-      raise NotImplementedError('current implementation supports input '
-                                'with only one modality type or less.')
-
-    self._step_reward = step_reward
-    self._goal_reward = goal_reward
-    self._hit_wall_reward = hit_wall_reward
-    self._done_at_target = done_at_target
-    self._use_continuous_reward = use_continuous_reward
-
-    self._previous_path_length = None
-
-  def episode(self):
-    observations, _, path = self._exploration()
-    if len(path) < 2:
-      raise ValueError('The exploration path has only one node.')
-
-    g = self._env.graph
-    start = path[-1]
-    while True:
-      goal = self._rng.choice(path[:-1])
-      if goal != start:
-        break
-    goal_path = nx.shortest_path(g, start, goal)
-
-    init_orientation = self._rng.uniform(0, np.pi, (1,))
-    trajectory = np.array(
-        [list(self._env.vertex_to_pose(p)) for p in goal_path])
-    init_xy = np.reshape(trajectory[0, :], [-1])
-    init_state = np.concatenate([init_xy, init_orientation], 0)
-
-    trajectory = trajectory[1:, :]
-    deltas = envs_util.trajectory_to_deltas(trajectory, init_state)
-    output_seq_len = self._config.output.shape[0]
-    arr = _pad_or_clip_array(deltas, output_seq_len, output_mask=True)
-    # pylint: disable=unbalanced-tuple-unpacking
-    thetas, _, thetas_mask = arr
-
-    query = self._env.observation(self._env.vertex_to_pose(goal)).values()[0]
-
-    return observations, query, (thetas, thetas_mask)
-
-  def reward(self, obs, done, info):
-    if 'wall_collision' in info and info['wall_collision']:
-      return obs, self._hit_wall_reward, done, info
-
-    reward = 0.0
-    current_vertex = self._env.pose_to_vertex(self._env.state)
-
-    if current_vertex in self._env.targets():
-      if self._done_at_target:
-        done = True
-      else:
-        obs = self._env.reset()
-      reward = self._goal_reward
-    else:
-      if self._use_continuous_reward:
-        if len(self._env.targets()) != 1:
-          raise ValueError(
-              'FindX task with continuous reward is assuming only one target.')
-        goal_vertex = self._env.targets()[0]
-        path_length = self._compute_path_length(goal_vertex)
-        reward = self._previous_path_length - path_length
-        self._previous_path_length = path_length
-      else:
-        reward = self._step_reward
-
-    return obs, reward, done, info
-
-  def _compute_path_length(self, goal_vertex):
-    current_vertex = self._env.pose_to_vertex(self._env.state)
-    path = nx.shortest_path(self._env.graph, current_vertex, goal_vertex)
-    assert len(path) >= 2
-    curr_xy = np.array(self._env.state[:2])
-    next_xy = np.array(self._env.vertex_to_pose(path[1]))
-    last_step_distance = np.linalg.norm(next_xy - curr_xy)
-    return (len(path) - 2) * self._env.cell_size_px + last_step_distance
-
-  def reset(self, observation):
-    if self._use_continuous_reward:
-      if len(self._env.targets()) != 1:
-        raise ValueError(
-            'FindX task with continuous reward is assuming only one target.')
-      goal_vertex = self._env.targets()[0]
-      self._previous_path_length = self._compute_path_length(goal_vertex)
-
-  def target_loss(self, truth, predicted, weights=None):
-    """Action classification loss.
-
-    Args:
-      truth: a batch_size x sequence length x number of labels float
-        Tensor containing a one hot vector for each label in each batch and
-        time.
-      predicted: a batch_size x sequence length x number of labels float
-        Tensor containing a predicted distribution over all actions.
-      weights: a batch_size x sequence_length float Tensor of bool
-        denoting which actions are valid.
-
-    Returns:
-      An average cross entropy over all batches and elements in sequence.
-    """
-    return classification_loss(
-        truth=truth, predicted=predicted, weights=weights, is_one_hot=True)
-
-
-class RelativeLocationTask(RandomExplorationBasedTask):
-  """A task of estimating the relative location of a query w.r.t current.
-
-  It is to be used for debugging. It is designed such that the output is a
-  single value, out of a discrete set of values, so that it can be phrased as
-  a classification problem.
-  """
-
-  def __init__(self, num_labels, *args, **kwargs):
-    """Initializes a relative location task.
-
-    Args:
-      num_labels: integer, number of orientations to bin the relative
-        orientation into.
-      *args: see super class.
-      **kwargs: see super class.
-    """
-    super(RelativeLocationTask, self).__init__(*args, **kwargs)
-    self._num_labels = num_labels
-    if len(self.config.inputs.keys()) != 1:
-      raise NotImplementedError('current implementation supports input '
-                                'with only one modality type')
-
-  def episode(self):
-    observations, states, path = self._exploration()
-
-    # Select a random element from history.
-    path_to_obs, _ = self._obs_to_state(path, states)
-    use_exploration_obs = not self._add_query_noise
-    query, _, query_state = self._sample_obs(
-        path[:-1],
-        observations.values()[0],
-        states,
-        path_to_obs,
-        max_obs_index=None,
-        use_exploration_obs=use_exploration_obs)
-
-    x, y, theta = tuple(states[-1])
-    q_x, q_y, _ = tuple(query_state)
-    t_x, t_y = q_x - x, q_y - y
-    (rt_x, rt_y) = (np.sin(theta) * t_x - np.cos(theta) * t_y,
-                    np.cos(theta) * t_x + np.sin(theta) * t_y)
-    # Bins are [a(i), a(i+1)] for a(i) = -pi + 0.5 * bin_size + i * bin_size.
-    shift = np.pi * (1 - 1.0 / (2.0 * self._num_labels))
-    orientation = np.arctan2(rt_y, rt_x) + shift
-    if orientation < 0:
-      orientation += 2 * np.pi
-    label = int(np.floor(self._num_labels * orientation / (2 * np.pi)))
-
-    out_shape = self._config.output.shape
-    if len(out_shape) != 1:
-      raise ValueError('Output shape should be of rank 1.')
-    if out_shape[0] != self._num_labels:
-      raise ValueError('Output shape must be of size %d' % self._num_labels)
-    output = np.zeros(out_shape, dtype=np.float32)
-    output[label] = 1
-
-    return observations, query, (output, None)
-
-  def target_loss(self, truth, predicted, weights=None):
-    return classification_loss(
-        truth=truth, predicted=predicted, weights=weights, is_one_hot=True)
-
-
-class LocationClassificationTask(UnrolledTask):
-  """A task of classifying a location as one of several classes.
-
-  The task does not have an input, but just a query and an output. The query
-  is an observation of the current location, e.g. an image taken from the
-  current state. The output is a label classifying this location in one of
-  predefined set of locations (or landmarks).
-
-  The current implementation classifies locations as intersections based on the
-  number and directions of biforcations. It is expected that a location can have
-  at most 4 different directions, aligned with the axes. As each of these four
-  directions might be present or not, the number of possible intersections are
-  2^4 = 16.
-  """
-
-  def __init__(self, env, seed, *args, **kwargs):
-    super(LocationClassificationTask, self).__init__(*args, **kwargs)
-    self._env = env
-    self._rng = np.random.RandomState(seed)
-    # A location property which can be set. If not set, a random one is
-    # generated.
-    self._location = None
-    if len(self.config.inputs.keys()) > 1:
-      raise NotImplementedError('current implementation supports input '
-                                'with only one modality type or less.')
-
-  @property
-  def location(self):
-    return self._location
-
-  @location.setter
-  def location(self, location):
-    self._location = location
-
-  def episode(self):
-    # Get a location. If not set, sample on at a vertex with a random
-    # orientation
-    location = self._location
-    if location is None:
-      num_nodes = self._env.graph.number_of_nodes()
-      vertex = int(math.floor(self._rng.uniform(0, num_nodes)))
-      xy = self._env.vertex_to_pose(vertex)
-      theta = self._rng.uniform(0, 2 * math.pi)
-      location = np.concatenate(
-          [np.reshape(xy, [-1]), np.array([theta])], axis=0)
-    else:
-      vertex = self._env.pose_to_vertex(location)
-
-    theta = location[2]
-    neighbors = self._env.graph.neighbors(vertex)
-    xy_s = [self._env.vertex_to_pose(n) for n in neighbors]
-
-    def rotate(xy, theta):
-      """Rotates a vector around the origin by angle theta.
-
-      Args:
-        xy: a numpy darray of shape (2, ) of floats containing the x and y
-          coordinates of a vector.
-        theta: a python float containing the rotation angle in radians.
-
-      Returns:
-        A numpy darray of floats of shape (2,) containing the x and y
-          coordinates rotated xy.
-      """
-      rotated_x = np.cos(theta) * xy[0] - np.sin(theta) * xy[1]
-      rotated_y = np.sin(theta) * xy[0] + np.cos(theta) * xy[1]
-      return np.array([rotated_x, rotated_y])
-
-    # Rotate all intersection biforcation by the orientation of the agent as the
-    # intersection label is defined in an agent centered fashion.
-    xy_s = [
-        rotate(xy - location[0:2], -location[2] - math.pi / 4) for xy in xy_s
-    ]
-    th_s = [np.arctan2(xy[1], xy[0]) for xy in xy_s]
-
-    out_shape = self._config.output.shape
-    if len(out_shape) != 1:
-      raise ValueError('Output shape should be of rank 1.')
-    num_labels = out_shape[0]
-    if num_labels != 16:
-      raise ValueError('Currently only 16 labels are supported '
-                       '(there are 16 different 4 way intersection types).')
-
-    th_s = set([int(math.floor(4 * (th / (2 * np.pi) + 0.5))) for th in th_s])
-    one_hot_label = np.zeros((num_labels,), dtype=np.float32)
-    label = 0
-    for th in th_s:
-      label += pow(2, th)
-    one_hot_label[int(label)] = 1.0
-
-    query = self._env.observation(location).values()[0]
-    return [], query, (one_hot_label, None)
-
-  def reward(self, obs, done, info):
-    raise ValueError('Do not call.')
-
-  def target_loss(self, truth, predicted, weights=None):
-    return classification_loss(
-        truth=truth, predicted=predicted, weights=weights, is_one_hot=True)
-
-
-class GotoStaticXNoExplorationTask(UnrolledTask):
-  """An interface for findX tasks without exploration.
-
-  The agent is initialized a random location in a random world and a random goal
-  and the objective is for the agent to move toward the goal. This class
-  generates episode for such task. Each generates a sequence of observations x
-  and target outputs y. x is the observations and is an OrderedDict with keys
-  provided from config.inputs.keys() and the shapes provided in the
-  config.inputs. The output is a numpy arrays with the shape specified in the
-  config.output. The shape of the array is (sequence_length x action_size) where
-  action is the number of actions that can be done in the environment. Note that
-  config.output.shape should be set according to the number of actions that can
-  be done in the env.
-  target outputs y are the groundtruth value of each action that is computed
-  from the environment graph. The target output for each action is proportional
-  to the progress that each action makes. Target value of 1 means that the
-  action takes the agent one step closer, -1 means the action takes the agent
-  one step farther. Value of -2 means that action should not take place at all.
-  This can be because the action leads to collision or it wants to terminate the
-  episode prematurely.
-  """
-
-  def __init__(self, env, *args, **kwargs):
-    super(GotoStaticXNoExplorationTask, self).__init__(*args, **kwargs)
-
-    if self._config.query is not None:
-      raise ValueError('query should be None.')
-    if len(self._config.output.shape) != 2:
-      raise ValueError('output should only have two dimensions:'
-                       '(sequence_length x number_of_actions)')
-    for input_config in self._config.inputs.values():
-      if input_config.shape[0] != self._config.output.shape[0]:
-        raise ValueError('the first dimension of the input and output should'
-                         'be the same.')
-    if len(self._config.output.shape) != 2:
-      raise ValueError('output shape should be '
-                       '(sequence_length x number_of_actions)')
-
-    self._env = env
-
-  def _compute_shortest_path_length(self, vertex, target_vertices):
-    """Computes length of the shortest path from vertex to any target vertexes.
-
-    Args:
-      vertex: integer, index of the vertex in the environment graph.
-      target_vertices: list of the target vertexes
-
-    Returns:
-      integer, minimum distance from the vertex to any of the target_vertices.
-
-    Raises:
-      ValueError: if there is no path between the vertex and at least one of
-        the target_vertices.
-    """
-    try:
-      return np.min([
-          len(nx.shortest_path(self._env.graph, vertex, t))
-          for t in target_vertices
-      ])
-    except:
-      #logging.error('there is no path between vertex %d and at least one of '
-      #              'the targets %r', vertex, target_vertices)
-      raise
-
-  def _compute_gt_value(self, vertex, target_vertices):
-    """Computes groundtruth value of all the actions at the vertex.
-
-    The value of each action is the difference each action makes in the length
-    of the shortest path to the goal. If an action takes the agent one step
-    closer to the goal the value is 1. In case, it takes the agent one step away
-    from the goal it would be -1. If it leads to collision or if the agent uses
-    action stop before reaching to the goal it is -2. To avoid scale issues the
-    gt_values are multipled by 0.5.
-
-    Args:
-      vertex: integer, the index of current vertex.
-      target_vertices: list of the integer indexes of the target views.
-
-    Returns:
-      numpy array with shape (action_size,) and each element is the groundtruth
-      value of each action based on the progress each action makes.
-    """
-    action_size = self._config.output.shape[1]
-    output_value = np.ones((action_size), dtype=np.float32) * -2
-    my_distance = self._compute_shortest_path_length(vertex, target_vertices)
-    for adj in self._env.graph[vertex]:
-      adj_distance = self._compute_shortest_path_length(adj, target_vertices)
-      if adj_distance is None:
-        continue
-      action_index = self._env.action(
-          self._env.vertex_to_pose(vertex), self._env.vertex_to_pose(adj))
-      assert action_index is not None, ('{} is not adjacent to {}. There might '
-                                        'be a problem in environment graph '
-                                        'connectivity because there is no '
-                                        'direct edge between the given '
-                                        'vertices').format(
-                                            self._env.vertex_to_pose(vertex),
-                                            self._env.vertex_to_pose(adj))
-      output_value[action_index] = my_distance - adj_distance
-
-    return output_value * 0.5
-
-  def episode(self):
-    """Returns data needed to train and test a single episode.
-
-    Returns:
-      (inputs, None, output) where inputs is a dictionary of modality types to
-        numpy arrays. The second element is query but we assume that the goal
-        is also given as part of observation so it should be None for this task,
-        and the outputs is the tuple of ground truth action values with the
-        shape of (sequence_length x action_size) that is coming from
-        config.output.shape and a numpy array with the shape of
-        (sequence_length,) that is 1 if the corresponding element of the
-        input and output should be used in the training optimization.
-
-    Raises:
-      ValueError: If the output values for env.random_step_sequence is not
-        valid.
-      ValueError: If the shape of observations coming from the env is not
-        consistent with the config.
-      ValueError: If there is a modality type specified in the config but the
-        environment does not return that.
-    """
-    # Sequence length is the first dimension of any of the input tensors.
-    sequence_length = self._config.inputs.values()[0].shape[0]
-    modality_types = self._config.inputs.keys()
-
-    path, _, _, step_outputs = self._env.random_step_sequence(
-        max_len=sequence_length)
-    target_vertices = [self._env.pose_to_vertex(x) for x in self._env.targets()]
-
-    if len(path) != len(step_outputs):
-      raise ValueError('path, and step_outputs should have equal length'
-                       ' {}!={}'.format(len(path), len(step_outputs)))
-
-    # Building up observations. observations will be a OrderedDict of
-    # modality types. The values are numpy arrays that follow the given shape
-    # in the input config for each modality type.
-    observations = collections.OrderedDict([k, []] for k in modality_types)
-    for step_output in step_outputs:
-      obs_dict = step_output[0]
-      # Only going over the modality types that are specified in the input
-      # config.
-      for modality_type in modality_types:
-        if modality_type not in obs_dict:
-          raise ValueError('modality type is not returned from the environment.'
-                           '{} not in {}'.format(modality_type,
-                                                 obs_dict.keys()))
-        obs = obs_dict[modality_type]
-        if np.any(
-            obs.shape != tuple(self._config.inputs[modality_type].shape[1:])):
-          raise ValueError(
-              'The observations should have the same size as speicifed in'
-              'config for modality type {}. {} != {}'.format(
-                  modality_type, obs.shape,
-                  self._config.inputs[modality_type].shape[1:]))
-        observations[modality_type].append(obs)
-
-    gt_value = [self._compute_gt_value(v, target_vertices) for v in path]
-
-    # pylint: disable=unbalanced-tuple-unpacking
-    gt_value, _, value_mask = _pad_or_clip_array(
-        np.array(gt_value),
-        sequence_length,
-        is_front_clip=False,
-        output_mask=True,
-    )
-    for modality_type, obs in observations.iteritems():
-      observations[modality_type], _, mask = _pad_or_clip_array(
-          np.array(obs), sequence_length, is_front_clip=False, output_mask=True)
-      assert np.all(mask == value_mask)
-
-    return observations, None, (gt_value, value_mask)
-
-  def reset(self, observation):
-    """Called after the environment is reset."""
-    pass
-
-  def target_loss(self, true_targets, targets, weights=None):
-    """A loss for training a task model.
-
-    This loss measures the discrepancy between the task outputs, the true and
-    predicted ones.
-
-    Args:
-      true_targets: tf.Tensor of tf.float32 with the shape of
-        (batch_size x sequence_length x action_size).
-      targets: tf.Tensor of tf.float32 with the shape of
-        (batch_size x sequence_length x action_size).
-      weights: tf.Tensor of tf.bool with the shape of
-        (batch_size x sequence_length).
-
-    Raises:
-      ValueError: if the shapes of the input tensors are not consistent.
-
-    Returns:
-      L2 loss between the predicted action values and true action values.
-    """
-    targets_shape = targets.get_shape().as_list()
-    true_targets_shape = true_targets.get_shape().as_list()
-    if len(targets_shape) != 3 or len(true_targets_shape) != 3:
-      raise ValueError('invalid shape for targets or true_targets_shape')
-    if np.any(targets_shape != true_targets_shape):
-      raise ValueError('the shape of targets and true_targets are not the same'
-                       '{} != {}'.format(targets_shape, true_targets_shape))
-
-    if weights is not None:
-      # Filtering targets and true_targets using weights.
-      weights_shape = weights.get_shape().as_list()
-      if np.any(weights_shape != targets_shape[0:2]):
-        raise ValueError('The first two elements of weights shape should match'
-                         'target. {} != {}'.format(weights_shape,
-                                                   targets_shape))
-      true_targets = tf.boolean_mask(true_targets, weights)
-      targets = tf.boolean_mask(targets, weights)
-
-    return tf.losses.mean_squared_error(tf.reshape(targets, [-1]),
-                                        tf.reshape(true_targets, [-1]))
-
-  def reward(self, obs, done, info):
-    raise NotImplementedError('reward is not implemented for this task')
-
-
-################################################################################
-class NewTask(UnrolledTask):
-  def __init__(self, env, *args, **kwargs):
-    super(NewTask, self).__init__(*args, **kwargs)
-    self._env = env
-
-  def _compute_shortest_path_length(self, vertex, target_vertices):
-    """Computes length of the shortest path from vertex to any target vertexes.
-
-    Args:
-      vertex: integer, index of the vertex in the environment graph.
-      target_vertices: list of the target vertexes
-
-    Returns:
-      integer, minimum distance from the vertex to any of the target_vertices.
-
-    Raises:
-      ValueError: if there is no path between the vertex and at least one of
-        the target_vertices.
-    """
-    try:
-      return np.min([
-          len(nx.shortest_path(self._env.graph, vertex, t))
-          for t in target_vertices
-      ])
-    except:
-      logging.error('there is no path between vertex %d and at least one of '
-                    'the targets %r', vertex, target_vertices)
-      raise
-
-  def _compute_gt_value(self, vertex, target_vertices):
-    """Computes groundtruth value of all the actions at the vertex.
-
-    The value of each action is the difference each action makes in the length
-    of the shortest path to the goal. If an action takes the agent one step
-    closer to the goal the value is 1. In case, it takes the agent one step away
-    from the goal it would be -1. If it leads to collision or if the agent uses
-    action stop before reaching to the goal it is -2. To avoid scale issues the
-    gt_values are multipled by 0.5.
-
-    Args:
-      vertex: integer, the index of current vertex.
-      target_vertices: list of the integer indexes of the target views.
-
-    Returns:
-      numpy array with shape (action_size,) and each element is the groundtruth
-      value of each action based on the progress each action makes.
-    """
-    action_size = self._config.output.shape[1]
-    output_value = np.ones((action_size), dtype=np.float32) * -2
-    # own compute _compute_shortest_path_length - returnts float
-    my_distance = self._compute_shortest_path_length(vertex, target_vertices)
-    for adj in self._env.graph[vertex]:
-      adj_distance = self._compute_shortest_path_length(adj, target_vertices)
-      if adj_distance is None:
-        continue
-      action_index = self._env.action(
-          self._env.vertex_to_pose(vertex), self._env.vertex_to_pose(adj))
-      assert action_index is not None, ('{} is not adjacent to {}. There might '
-                                        'be a problem in environment graph '
-                                        'connectivity because there is no '
-                                        'direct edge between the given '
-                                        'vertices').format(
-                                            self._env.vertex_to_pose(vertex),
-                                            self._env.vertex_to_pose(adj))
-      output_value[action_index] = my_distance - adj_distance
-
-    return output_value * 0.5
-
-  def episode(self):
-    """Returns data needed to train and test a single episode.
-
-    Returns:
-      (inputs, None, output) where inputs is a dictionary of modality types to
-        numpy arrays. The second element is query but we assume that the goal
-        is also given as part of observation so it should be None for this task,
-        and the outputs is the tuple of ground truth action values with the
-        shape of (sequence_length x action_size) that is coming from
-        config.output.shape and a numpy array with the shape of
-        (sequence_length,) that is 1 if the corresponding element of the
-        input and output should be used in the training optimization.
-
-    Raises:
-      ValueError: If the output values for env.random_step_sequence is not
-        valid.
-      ValueError: If the shape of observations coming from the env is not
-        consistent with the config.
-      ValueError: If there is a modality type specified in the config but the
-        environment does not return that.
-    """
-    # Sequence length is the first dimension of any of the input tensors.
-    sequence_length = self._config.inputs.values()[0].shape[0]
-    modality_types = self._config.inputs.keys()
-
-    path, _, _, step_outputs = self._env.random_step_sequence(
-        max_len=sequence_length)
-    target_vertices = [self._env.pose_to_vertex(x) for x in self._env.targets()]
-
-    if len(path) != len(step_outputs):
-      raise ValueError('path, and step_outputs should have equal length'
-                       ' {}!={}'.format(len(path), len(step_outputs)))
-
-    # Building up observations. observations will be a OrderedDict of
-    # modality types. The values are numpy arrays that follow the given shape
-    # in the input config for each modality type.
-    observations = collections.OrderedDict([k, []] for k in modality_types)
-    for step_output in step_outputs:
-      obs_dict = step_output[0]
-      # Only going over the modality types that are specified in the input
-      # config.
-      for modality_type in modality_types:
-        if modality_type not in obs_dict:
-          raise ValueError('modality type is not returned from the environment.'
-                           '{} not in {}'.format(modality_type,
-                                                 obs_dict.keys()))
-        obs = obs_dict[modality_type]
-        if np.any(
-            obs.shape != tuple(self._config.inputs[modality_type].shape[1:])):
-          raise ValueError(
-              'The observations should have the same size as speicifed in'
-              'config for modality type {}. {} != {}'.format(
-                  modality_type, obs.shape,
-                  self._config.inputs[modality_type].shape[1:]))
-        observations[modality_type].append(obs)
-
-    gt_value = [self._compute_gt_value(v, target_vertices) for v in path]
-
-    # pylint: disable=unbalanced-tuple-unpacking
-    gt_value, _, value_mask = _pad_or_clip_array(
-        np.array(gt_value),
-        sequence_length,
-        is_front_clip=False,
-        output_mask=True,
-    )
-    for modality_type, obs in observations.iteritems():
-      observations[modality_type], _, mask = _pad_or_clip_array(
-          np.array(obs), sequence_length, is_front_clip=False, output_mask=True)
-      assert np.all(mask == value_mask)
-
-    return observations, None, (gt_value, value_mask)
-
-  def reset(self, observation):
-    """Called after the environment is reset."""
-    pass
-
-  def target_loss(self, true_targets, targets, weights=None):
-    """A loss for training a task model.
-
-    This loss measures the discrepancy between the task outputs, the true and
-    predicted ones.
-
-    Args:
-      true_targets: tf.Tensor of tf.float32 with the shape of
-        (batch_size x sequence_length x action_size).
-      targets: tf.Tensor of tf.float32 with the shape of
-        (batch_size x sequence_length x action_size).
-      weights: tf.Tensor of tf.bool with the shape of
-        (batch_size x sequence_length).
-
-    Raises:
-      ValueError: if the shapes of the input tensors are not consistent.
-
-    Returns:
-      L2 loss between the predicted action values and true action values.
-    """
-    targets_shape = targets.get_shape().as_list()
-    true_targets_shape = true_targets.get_shape().as_list()
-    if len(targets_shape) != 3 or len(true_targets_shape) != 3:
-      raise ValueError('invalid shape for targets or true_targets_shape')
-    if np.any(targets_shape != true_targets_shape):
-      raise ValueError('the shape of targets and true_targets are not the same'
-                       '{} != {}'.format(targets_shape, true_targets_shape))
-
-    if weights is not None:
-      # Filtering targets and true_targets using weights.
-      weights_shape = weights.get_shape().as_list()
-      if np.any(weights_shape != targets_shape[0:2]):
-        raise ValueError('The first two elements of weights shape should match'
-                         'target. {} != {}'.format(weights_shape,
-                                                   targets_shape))
-      true_targets = tf.boolean_mask(true_targets, weights)
-      targets = tf.boolean_mask(targets, weights)
-
-    return tf.losses.mean_squared_error(tf.reshape(targets, [-1]),
-                                        tf.reshape(true_targets, [-1]))
-
-  def reward(self, obs, done, info):
-    raise NotImplementedError('reward is not implemented for this task')
--- a/research/cognitive_planning/train_supervised_active_vision.py
+++ b/research/cognitive_planning/train_supervised_active_vision.py
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# pylint: disable=line-too-long
-# pyformat: disable
-"""Train and eval for supervised navigation training.
-
-For training:
-python train_supervised_active_vision.py \
-  --mode='train' \
-  --logdir=$logdir/checkin_log_det/ \
-  --modality_types='det' \
-  --batch_size=8 \
-  --train_iters=200000 \
-  --lstm_cell_size=2048 \
-  --policy_fc_size=2048 \
-  --sequence_length=20 \
-  --max_eval_episode_length=100 \
-  --test_iters=194 \
-  --gin_config=envs/configs/active_vision_config.gin \
-  --gin_params='ActiveVisionDatasetEnv.dataset_root="$datadir"' \
-  --logtostderr
-
-For testing:
-python train_supervised_active_vision.py
-  --mode='eval' \
-  --logdir=$logdir/checkin_log_det/ \
-  --modality_types='det' \
-  --batch_size=8 \
-  --train_iters=200000 \
-  --lstm_cell_size=2048 \
-  --policy_fc_size=2048 \
-  --sequence_length=20 \
-  --max_eval_episode_length=100 \
-  --test_iters=194 \
-  --gin_config=envs/configs/active_vision_config.gin \
-  --gin_params='ActiveVisionDatasetEnv.dataset_root="$datadir"' \
-  --logtostderr
-"""
-
-import collections
-import os
-import time
-from absl import app
-from absl import flags
-from absl import logging
-import networkx as nx
-import numpy as np
-import tensorflow as tf
-import gin
-import embedders
-import policies
-import tasks
-from envs import active_vision_dataset_env
-from envs import task_env
-
-slim = tf.contrib.slim
-
-flags.DEFINE_string('logdir', '',
-                    'Path to a directory to write summaries and checkpoints')
-# Parameters controlling the training setup. In general one would not need to
-# modify them.
-flags.DEFINE_string('master', 'local',
-                    'BNS name of the TensorFlow master, or local.')
-flags.DEFINE_integer('task_id', 0,
-                     'Task id of the replica running the training.')
-flags.DEFINE_integer('ps_tasks', 0,
-                     'Number of tasks in the ps job. If 0 no ps job is used.')
-
-flags.DEFINE_integer('decay_steps', 1000,
-                     'Number of steps for exponential decay.')
-flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate.')
-flags.DEFINE_integer('batch_size', 8, 'Batch size.')
-flags.DEFINE_integer('sequence_length', 20, 'sequence length')
-flags.DEFINE_integer('train_iters', 200000, 'number of training iterations.')
-flags.DEFINE_integer('save_summaries_secs', 300,
-                     'number of seconds between saving summaries')
-flags.DEFINE_integer('save_interval_secs', 300,
-                     'numer of seconds between saving variables')
-flags.DEFINE_integer('log_every_n_steps', 20, 'number of steps between logging')
-flags.DEFINE_string('modality_types', '',
-                    'modality names in _ separated format')
-flags.DEFINE_string('conv_window_sizes', '8_4_3',
-                    'conv window size in separated by _')
-flags.DEFINE_string('conv_strides', '4_2_1', '')
-flags.DEFINE_string('conv_channels', '8_16_16', '')
-flags.DEFINE_integer('embedding_fc_size', 128,
-                     'size of embedding for each modality')
-flags.DEFINE_integer('obs_resolution', 64,
-                     'resolution of the input observations')
-flags.DEFINE_integer('lstm_cell_size', 2048, 'size of lstm cell size')
-flags.DEFINE_integer('policy_fc_size', 2048,
-                     'size of fully connected layers for policy part')
-flags.DEFINE_float('weight_decay', 0.0002, 'weight decay')
-flags.DEFINE_integer('goal_category_count', 5, 'number of goal categories')
-flags.DEFINE_integer('action_size', 7, 'number of possible actions')
-flags.DEFINE_integer('max_eval_episode_length', 100,
-                     'maximum sequence length for evaluation.')
-flags.DEFINE_enum('mode', 'train', ['train', 'eval'],
-                  'indicates whether it is in training or evaluation')
-flags.DEFINE_integer('test_iters', 194,
-                     'number of iterations that the eval needs to be run')
-flags.DEFINE_multi_string('gin_config', [],
-                          'List of paths to a gin config files for the env.')
-flags.DEFINE_multi_string('gin_params', [],
-                          'Newline separated list of Gin parameter bindings.')
-flags.DEFINE_string(
-    'resnet50_path', './resnet_v2_50_checkpoint/resnet_v2_50.ckpt', 'path to resnet50'
-    'checkpoint')
-flags.DEFINE_bool('freeze_resnet_weights', True, '')
-flags.DEFINE_string(
-    'eval_init_points_file_name', '',
-    'Name of the file that containts the initial locations and'
-    'worlds for each evalution point')
-
-FLAGS = flags.FLAGS
-TRAIN_WORLDS = [
-    'Home_001_1', 'Home_001_2', 'Home_002_1', 'Home_003_1', 'Home_003_2',
-    'Home_004_1', 'Home_004_2', 'Home_005_1', 'Home_005_2', 'Home_006_1',
-    'Home_010_1'
-]
-
-TEST_WORLDS = ['Home_011_1', 'Home_013_1', 'Home_016_1']
-
-
-def create_modality_types():
-  """Parses the modality_types and returns a list of task_env.ModalityType."""
-  if not FLAGS.modality_types:
-    raise ValueError('there needs to be at least one modality type')
-  modality_types = FLAGS.modality_types.split('_')
-  for x in modality_types:
-    if x not in ['image', 'sseg', 'det', 'depth']:
-      raise ValueError('invalid modality type: {}'.format(x))
-
-  conversion_dict = {
-      'image': task_env.ModalityTypes.IMAGE,
-      'sseg': task_env.ModalityTypes.SEMANTIC_SEGMENTATION,
-      'depth': task_env.ModalityTypes.DEPTH,
-      'det': task_env.ModalityTypes.OBJECT_DETECTION,
-  }
-  return [conversion_dict[k] for k in modality_types]
-
-
-def create_task_io_config(
-    modality_types,
-    goal_category_count,
-    action_size,
-    sequence_length,
-):
-  """Generates task io config."""
-  shape_prefix = [sequence_length, FLAGS.obs_resolution, FLAGS.obs_resolution]
-  shapes = {
-      task_env.ModalityTypes.IMAGE: [sequence_length, 224, 224, 3],
-      task_env.ModalityTypes.DEPTH: shape_prefix + [
-          2,
-      ],
-      task_env.ModalityTypes.SEMANTIC_SEGMENTATION: shape_prefix + [
-          1,
-      ],
-      task_env.ModalityTypes.OBJECT_DETECTION: shape_prefix + [
-          90,
-      ]
-  }
-  types = {k: tf.float32 for k in shapes}
-  types[task_env.ModalityTypes.IMAGE] = tf.uint8
-  inputs = collections.OrderedDict(
-      [[mtype, (types[mtype], shapes[mtype])] for mtype in modality_types])
-  inputs[task_env.ModalityTypes.GOAL] = (tf.float32,
-                                         [sequence_length, goal_category_count])
-  inputs[task_env.ModalityTypes.PREV_ACTION] = (tf.float32, [
-      sequence_length, action_size + 1
-  ])
-  print inputs
-  return tasks.UnrolledTaskIOConfig(
-      inputs=inputs,
-      output=(tf.float32, [sequence_length, action_size]),
-      query=None)
-
-
-def map_to_embedder(modality_type):
-  """Maps modality_type to its corresponding embedder."""
-  if modality_type == task_env.ModalityTypes.PREV_ACTION:
-    return None
-  if modality_type == task_env.ModalityTypes.GOAL:
-    return embedders.IdentityEmbedder()
-  if modality_type == task_env.ModalityTypes.IMAGE:
-    return embedders.ResNet50Embedder()
-  conv_window_sizes = [int(x) for x in FLAGS.conv_window_sizes.split('_')]
-  conv_channels = [int(x) for x in FLAGS.conv_channels.split('_')]
-  conv_strides = [int(x) for x in FLAGS.conv_strides.split('_')]
-  params = tf.contrib.training.HParams(
-      to_one_hot=modality_type == task_env.ModalityTypes.SEMANTIC_SEGMENTATION,
-      one_hot_length=10,
-      conv_sizes=conv_window_sizes,
-      conv_strides=conv_strides,
-      conv_channels=conv_channels,
-      embedding_size=FLAGS.embedding_fc_size,
-      weight_decay_rate=FLAGS.weight_decay,
-  )
-  return embedders.SmallNetworkEmbedder(params)
-
-
-def create_train_and_init_ops(policy, task):
-  """Creates training ops given the arguments.
-
-  Args:
-    policy: the policy for the task.
-    task: the task instance.
-
-  Returns:
-    train_op: the op that needs to be runned at each step.
-    summaries_op: the summary op that is executed.
-    init_fn: the op that initializes the variables if there is no previous
-      checkpoint. If Resnet50 is not used in the model it is None, otherwise
-      it reads the weights from FLAGS.resnet50_path and sets the init_fn
-      to the op that initializes the ResNet50 with the pre-trained weights.
-  """
-  assert isinstance(task, tasks.GotoStaticXNoExplorationTask)
-  assert isinstance(policy, policies.Policy)
-
-  inputs, _, gt_outputs, masks = task.tf_episode_batch(FLAGS.batch_size)
-  outputs, _ = policy.build(inputs, None)
-  loss = task.target_loss(gt_outputs, outputs, masks)
-
-  init_fn = None
-
-  # If resnet is added to the graph, init_fn should initialize resnet weights
-  # if there is no previous checkpoint.
-  variables_assign_dict = {}
-  vars_list = []
-  for v in slim.get_model_variables():
-    if v.name.find('resnet') >= 0:
-      if not FLAGS.freeze_resnet_weights:
-        vars_list.append(v)
-      variables_assign_dict[v.name[v.name.find('resnet'):-2]] = v
-    else:
-      vars_list.append(v)
-  
-  global_step = tf.train.get_or_create_global_step()
-  learning_rate = tf.train.exponential_decay(
-      FLAGS.learning_rate,
-      global_step,
-      decay_steps=FLAGS.decay_steps,
-      decay_rate=0.98,
-      staircase=True)
-  optimizer = tf.train.AdamOptimizer(learning_rate)
-  train_op = slim.learning.create_train_op(
-      loss,
-      optimizer,
-      global_step=global_step,
-      variables_to_train=vars_list,
-  )
-
-  if variables_assign_dict:
-    init_fn = slim.assign_from_checkpoint_fn(
-        FLAGS.resnet50_path,
-        variables_assign_dict,
-        ignore_missing_vars=False)
-  scalar_summaries = {}
-  scalar_summaries['LR'] = learning_rate
-  scalar_summaries['loss'] = loss
-
-  for name, summary in scalar_summaries.iteritems():
-    tf.summary.scalar(name, summary)
- 
-  return train_op, init_fn
-
-
-def create_eval_ops(policy, config, possible_targets):
-  """Creates the necessary ops for evaluation."""
-  inputs_feed = collections.OrderedDict([[
-      mtype,
-      tf.placeholder(config.inputs[mtype].type,
-                     [1] + config.inputs[mtype].shape)
-  ] for mtype in config.inputs])
-  inputs_feed[task_env.ModalityTypes.PREV_ACTION] = tf.placeholder(
-      tf.float32, [1, 1] + [
-          config.output.shape[-1] + 1,
-      ])
-  prev_state_feed = [
-      tf.placeholder(
-          tf.float32, [1, FLAGS.lstm_cell_size], name='prev_state_{}'.format(i))
-      for i in range(2)
-  ]
-  policy_outputs = policy.build(inputs_feed, prev_state_feed)
-  summary_feed = {}
-  for c in possible_targets + ['mean']:
-    summary_feed[c] = tf.placeholder(
-        tf.float32, [], name='eval_in_range_{}_input'.format(c))
-    tf.summary.scalar('eval_in_range_{}'.format(c), summary_feed[c])
-
-  return inputs_feed, prev_state_feed, policy_outputs, (tf.summary.merge_all(),
-                                                        summary_feed)
-
-
-def unroll_policy_for_eval(
-    sess,
-    env,
-    inputs_feed,
-    prev_state_feed,
-    policy_outputs,
-    number_of_steps,
-    output_folder,
-):
-  """unrolls the policy for testing.
-
-  Args:
-    sess: tf.Session
-    env: The environment.
-    inputs_feed: dictionary of placeholder for the input modalities.
-    prev_state_feed: placeholder for the input to the prev_state of the model.
-    policy_outputs: tensor that contains outputs of the policy.
-    number_of_steps: maximum number of unrolling steps.
-    output_folder: output_folder where the function writes a dictionary of
-      detailed information about the path. The dictionary keys are 'states' and
-      'distance'. The value for 'states' is the list of states that the agent
-      goes along the path. The value for 'distance' contains the length of
-      shortest path to the goal at each step.
-
-  Returns:
-    states: list of states along the path.
-    distance: list of distances along the path.
-  """
-  prev_state = [
-      np.zeros((1, FLAGS.lstm_cell_size), dtype=np.float32) for _ in range(2)
-  ]
-  prev_action = np.zeros((1, 1, FLAGS.action_size + 1), dtype=np.float32)
-  obs = env.reset()
-  distances_to_goal = []
-  states = []
-  unique_id = '{}_{}'.format(env.cur_image_id(), env.goal_string)
-  for _ in range(number_of_steps):
-    distances_to_goal.append(
-        np.min([
-            len(
-                nx.shortest_path(env.graph, env.pose_to_vertex(env.state()),
-                                 env.pose_to_vertex(target_view)))
-            for target_view in env.targets()
-        ]))
-    states.append(env.state())
-    feed_dict = {inputs_feed[mtype]: [[obs[mtype]]] for mtype in inputs_feed}
-    feed_dict[prev_state_feed[0]] = prev_state[0]
-    feed_dict[prev_state_feed[1]] = prev_state[1]
-    action_values, prev_state = sess.run(policy_outputs, feed_dict=feed_dict)
-    chosen_action = np.argmax(action_values[0])
-    obs, _, done, info = env.step(np.int32(chosen_action))
-    prev_action[0][0][chosen_action] = 1.
-    prev_action[0][0][-1] = float(info['success'])
-    # If the agent chooses action stop or the number of steps exceeeded
-    # env._episode_length.
-    if done:
-      break
-
-  # logging.info('distance = %d, id = %s, #steps = %d', distances_to_goal[-1],
-  output_path = os.path.join(output_folder, unique_id + '.npy')
-  with tf.gfile.Open(output_path, 'w') as f:
-    print 'saving path information to {}'.format(output_path)
-    np.save(f, {'states': states, 'distance': distances_to_goal})
-  return states, distances_to_goal
-
-
-def init(sequence_length, eval_init_points_file_name, worlds):
-  """Initializes the common operations between train and test."""
-  modality_types = create_modality_types()
-  logging.info('modality types: %r', modality_types)
-  # negative reward_goal_range prevents the env from terminating early when the
-  # agent is close to the goal. The policy should keep the agent until the end
-  # of the 100 steps either through chosing stop action or oscilating around
-  # the target.
-
-  env = active_vision_dataset_env.ActiveVisionDatasetEnv(
-      modality_types=modality_types +
-      [task_env.ModalityTypes.GOAL, task_env.ModalityTypes.PREV_ACTION],
-      reward_goal_range=-1,
-      eval_init_points_file_name=eval_init_points_file_name,
-      worlds=worlds,
-      output_size=FLAGS.obs_resolution,
-  )
-
-  config = create_task_io_config(
-      modality_types=modality_types,
-      goal_category_count=FLAGS.goal_category_count,
-      action_size=FLAGS.action_size,
-      sequence_length=sequence_length,
-  )
-  task = tasks.GotoStaticXNoExplorationTask(env=env, config=config)
-  embedders_dict = {mtype: map_to_embedder(mtype) for mtype in config.inputs}
-  policy_params = tf.contrib.training.HParams(
-      lstm_state_size=FLAGS.lstm_cell_size,
-      fc_channels=FLAGS.policy_fc_size,
-      weight_decay=FLAGS.weight_decay,
-      target_embedding_size=FLAGS.embedding_fc_size,
-  )
-  policy = policies.LSTMPolicy(
-      modality_names=config.inputs.keys(),
-      embedders_dict=embedders_dict,
-      action_size=FLAGS.action_size,
-      params=policy_params,
-      max_episode_length=sequence_length)
-  return env, config, task, policy
-
-
-def test():
-  """Contains all the operations for testing policies."""
-  env, config, _, policy = init(1, 'all_init_configs', TEST_WORLDS)
-  inputs_feed, prev_state_feed, policy_outputs, summary_op = create_eval_ops(
-      policy, config, env.possible_targets)
-  sv = tf.train.Supervisor(logdir=FLAGS.logdir)
-  prev_checkpoint = None
-  with sv.managed_session(
-      start_standard_services=False,
-      config=tf.ConfigProto(allow_soft_placement=True)) as sess:
-    while not sv.should_stop():
-      while True:
-        new_checkpoint = tf.train.latest_checkpoint(FLAGS.logdir)
-        print 'new_checkpoint ', new_checkpoint
-        if not new_checkpoint:
-          time.sleep(1)
-          continue
-        if prev_checkpoint is None:
-          prev_checkpoint = new_checkpoint
-          break
-        if prev_checkpoint != new_checkpoint:
-          prev_checkpoint = new_checkpoint
-          break
-        else:  # if prev_checkpoint == new_checkpoint, we have to wait more.
-          time.sleep(1)
-
-      checkpoint_step = int(new_checkpoint[new_checkpoint.rfind('-') + 1:])
-      sv.saver.restore(sess, new_checkpoint)
-      print '--------------------'
-      print 'evaluating checkpoint {}'.format(new_checkpoint)
-      folder_path = os.path.join(FLAGS.logdir, 'evals', str(checkpoint_step))
-      if not tf.gfile.Exists(folder_path):
-        tf.gfile.MakeDirs(folder_path)
-      eval_stats = {c: [] for c in env.possible_targets}
-      for test_iter in range(FLAGS.test_iters):
-        print 'evaluating {} of {}'.format(test_iter, FLAGS.test_iters)
-        _, distance_to_goal = unroll_policy_for_eval(
-            sess,
-            env,
-            inputs_feed,
-            prev_state_feed,
-            policy_outputs,
-            FLAGS.max_eval_episode_length,
-            folder_path,
-        )
-        print 'goal = {}'.format(env.goal_string)
-        eval_stats[env.goal_string].append(float(distance_to_goal[-1] <= 7))
-      eval_stats = {k: np.mean(v) for k, v in eval_stats.iteritems()}
-      eval_stats['mean'] = np.mean(eval_stats.values())
-      print eval_stats
-      feed_dict = {summary_op[1][c]: eval_stats[c] for c in eval_stats}
-      summary_str = sess.run(summary_op[0], feed_dict=feed_dict)
-      writer = sv.summary_writer
-      writer.add_summary(summary_str, checkpoint_step)
-      writer.flush()
-
-
-def train():
-  _, _, task, policy = init(FLAGS.sequence_length, None, TRAIN_WORLDS)
-  print(FLAGS.save_summaries_secs)
-  print(FLAGS.save_interval_secs)
-  print(FLAGS.logdir)
-
-  with tf.device(
-      tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks, merge_devices=True)):
-    train_op, init_fn = create_train_and_init_ops(policy=policy, task=task)
-    print(FLAGS.logdir)
-    slim.learning.train(
-        train_op=train_op,
-        init_fn=init_fn,
-        logdir=FLAGS.logdir,
-        is_chief=FLAGS.task_id == 0,
-        number_of_steps=FLAGS.train_iters,
-        save_summaries_secs=FLAGS.save_summaries_secs,
-        save_interval_secs=FLAGS.save_interval_secs,
-        session_config=tf.ConfigProto(allow_soft_placement=True),
-    )
-
-
-def main(_):
-  gin.parse_config_files_and_bindings(FLAGS.gin_config, FLAGS.gin_params)
-  if FLAGS.mode == 'train':
-    train()
-  else:
-    test()
-
-
-if __name__ == '__main__':
-  app.run(main)
--- a/research/cognitive_planning/train_supervised_active_vision.sh
+++ b/research/cognitive_planning/train_supervised_active_vision.sh
-#!/bin/bash
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# blaze build -c opt train_supervised_active_vision
-# bazel build -c opt --config=cuda --copt=-mavx train_supervised_active_vision && \
-bazel-bin/research/cognitive_planning/train_supervised_active_vision \
-  --mode='train' \
-  --logdir=/usr/local/google/home/kosecka/local_avd_train/ \
-  --modality_types='det' \
-  --batch_size=8 \
-  --train_iters=200000 \
-  --lstm_cell_size=2048 \
-  --policy_fc_size=2048 \
-  --sequence_length=20 \
-  --max_eval_episode_length=100 \
-  --test_iters=194 \
-  --gin_config=envs/configs/active_vision_config.gin \
-  --gin_params='ActiveVisionDatasetEnv.dataset_root="/cns/jn-d/home/kosecka/AVD_Minimal/"' \
-  --logtostderr
--- a/research/cognitive_planning/visualization_utils.py
+++ b/research/cognitive_planning/visualization_utils.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A set of functions that are used for visualization.
-
-These functions often receive an image, perform some visualization on the image.
-The functions do not return a value, instead they modify the image itself.
-
-"""
-import collections
-import functools
-# Set headless-friendly backend.
-import matplotlib; matplotlib.use('Agg')  # pylint: disable=multiple-statements
-import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
-import numpy as np
-import PIL.Image as Image
-import PIL.ImageColor as ImageColor
-import PIL.ImageDraw as ImageDraw
-import PIL.ImageFont as ImageFont
-import six
-import tensorflow as tf
-
-import standard_fields as fields
-
-
-_TITLE_LEFT_MARGIN = 10
-_TITLE_TOP_MARGIN = 10
-STANDARD_COLORS = [
-    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
-    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
-    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
-    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
-    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
-    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
-    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
-    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
-    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
-    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
-    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
-    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
-    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
-    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
-    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
-    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
-    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
-    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
-    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
-    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
-    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
-    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
-    'WhiteSmoke', 'Yellow', 'YellowGreen'
-]
-
-
-def save_image_array_as_png(image, output_path):
-  """Saves an image (represented as a numpy array) to PNG.
-
-  Args:
-    image: a numpy array with shape [height, width, 3].
-    output_path: path to which image should be written.
-  """
-  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
-  with tf.gfile.Open(output_path, 'w') as fid:
-    image_pil.save(fid, 'PNG')
-
-
-def encode_image_array_as_png_str(image):
-  """Encodes a numpy array into a PNG string.
-
-  Args:
-    image: a numpy array with shape [height, width, 3].
-
-  Returns:
-    PNG encoded image string.
-  """
-  image_pil = Image.fromarray(np.uint8(image))
-  output = six.BytesIO()
-  image_pil.save(output, format='PNG')
-  png_string = output.getvalue()
-  output.close()
-  return png_string
-
-
-def draw_bounding_box_on_image_array(image,
-                                     ymin,
-                                     xmin,
-                                     ymax,
-                                     xmax,
-                                     color='red',
-                                     thickness=4,
-                                     display_str_list=(),
-                                     use_normalized_coordinates=True):
-  """Adds a bounding box to an image (numpy array).
-
-  Bounding box coordinates can be specified in either absolute (pixel) or
-  normalized coordinates by setting the use_normalized_coordinates argument.
-
-  Args:
-    image: a numpy array with shape [height, width, 3].
-    ymin: ymin of bounding box.
-    xmin: xmin of bounding box.
-    ymax: ymax of bounding box.
-    xmax: xmax of bounding box.
-    color: color to draw bounding box. Default is red.
-    thickness: line thickness. Default value is 4.
-    display_str_list: list of strings to display in box
-                      (each to be shown on its own line).
-    use_normalized_coordinates: If True (default), treat coordinates
-      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
-      coordinates as absolute.
-  """
-  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
-  draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
-                             thickness, display_str_list,
-                             use_normalized_coordinates)
-  np.copyto(image, np.array(image_pil))
-
-
-def draw_bounding_box_on_image(image,
-                               ymin,
-                               xmin,
-                               ymax,
-                               xmax,
-                               color='red',
-                               thickness=4,
-                               display_str_list=(),
-                               use_normalized_coordinates=True):
-  """Adds a bounding box to an image.
-
-  Bounding box coordinates can be specified in either absolute (pixel) or
-  normalized coordinates by setting the use_normalized_coordinates argument.
-
-  Each string in display_str_list is displayed on a separate line above the
-  bounding box in black text on a rectangle filled with the input 'color'.
-  If the top of the bounding box extends to the edge of the image, the strings
-  are displayed below the bounding box.
-
-  Args:
-    image: a PIL.Image object.
-    ymin: ymin of bounding box.
-    xmin: xmin of bounding box.
-    ymax: ymax of bounding box.
-    xmax: xmax of bounding box.
-    color: color to draw bounding box. Default is red.
-    thickness: line thickness. Default value is 4.
-    display_str_list: list of strings to display in box
-                      (each to be shown on its own line).
-    use_normalized_coordinates: If True (default), treat coordinates
-      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
-      coordinates as absolute.
-  """
-  draw = ImageDraw.Draw(image)
-  im_width, im_height = image.size
-  if use_normalized_coordinates:
-    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
-                                  ymin * im_height, ymax * im_height)
-  else:
-    (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
-  draw.line([(left, top), (left, bottom), (right, bottom),
-             (right, top), (left, top)], width=thickness, fill=color)
-  try:
-    font = ImageFont.truetype('arial.ttf', 24)
-  except IOError:
-    font = ImageFont.load_default()
-
-  # If the total height of the display strings added to the top of the bounding
-  # box exceeds the top of the image, stack the strings below the bounding box
-  # instead of above.
-  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
-  # Each display_str has a top and bottom margin of 0.05x.
-  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
-
-  if top > total_display_str_height:
-    text_bottom = top
-  else:
-    text_bottom = bottom + total_display_str_height
-  # Reverse list and print from bottom to top.
-  for display_str in display_str_list[::-1]:
-    text_width, text_height = font.getsize(display_str)
-    margin = np.ceil(0.05 * text_height)
-    draw.rectangle(
-        [(left, text_bottom - text_height - 2 * margin), (left + text_width,
-                                                          text_bottom)],
-        fill=color)
-    draw.text(
-        (left + margin, text_bottom - text_height - margin),
-        display_str,
-        fill='black',
-        font=font)
-    text_bottom -= text_height - 2 * margin
-
-
-def draw_bounding_boxes_on_image_array(image,
-                                       boxes,
-                                       color='red',
-                                       thickness=4,
-                                       display_str_list_list=()):
-  """Draws bounding boxes on image (numpy array).
-
-  Args:
-    image: a numpy array object.
-    boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
-           The coordinates are in normalized format between [0, 1].
-    color: color to draw bounding box. Default is red.
-    thickness: line thickness. Default value is 4.
-    display_str_list_list: list of list of strings.
-                           a list of strings for each bounding box.
-                           The reason to pass a list of strings for a
-                           bounding box is that it might contain
-                           multiple labels.
-
-  Raises:
-    ValueError: if boxes is not a [N, 4] array
-  """
-  image_pil = Image.fromarray(image)
-  draw_bounding_boxes_on_image(image_pil, boxes, color, thickness,
-                               display_str_list_list)
-  np.copyto(image, np.array(image_pil))
-
-
-def draw_bounding_boxes_on_image(image,
-                                 boxes,
-                                 color='red',
-                                 thickness=4,
-                                 display_str_list_list=()):
-  """Draws bounding boxes on image.
-
-  Args:
-    image: a PIL.Image object.
-    boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
-           The coordinates are in normalized format between [0, 1].
-    color: color to draw bounding box. Default is red.
-    thickness: line thickness. Default value is 4.
-    display_str_list_list: list of list of strings.
-                           a list of strings for each bounding box.
-                           The reason to pass a list of strings for a
-                           bounding box is that it might contain
-                           multiple labels.
-
-  Raises:
-    ValueError: if boxes is not a [N, 4] array
-  """
-  boxes_shape = boxes.shape
-  if not boxes_shape:
-    return
-  if len(boxes_shape) != 2 or boxes_shape[1] != 4:
-    raise ValueError('Input must be of size [N, 4]')
-  for i in range(boxes_shape[0]):
-    display_str_list = ()
-    if display_str_list_list:
-      display_str_list = display_str_list_list[i]
-    draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
-                               boxes[i, 3], color, thickness, display_str_list)
-
-
-def _visualize_boxes(image, boxes, classes, scores, category_index, **kwargs):
-  return visualize_boxes_and_labels_on_image_array(
-      image, boxes, classes, scores, category_index=category_index, **kwargs)
-
-
-def _visualize_boxes_and_masks(image, boxes, classes, scores, masks,
-                               category_index, **kwargs):
-  return visualize_boxes_and_labels_on_image_array(
-      image,
-      boxes,
-      classes,
-      scores,
-      category_index=category_index,
-      instance_masks=masks,
-      **kwargs)
-
-
-def _visualize_boxes_and_keypoints(image, boxes, classes, scores, keypoints,
-                                   category_index, **kwargs):
-  return visualize_boxes_and_labels_on_image_array(
-      image,
-      boxes,
-      classes,
-      scores,
-      category_index=category_index,
-      keypoints=keypoints,
-      **kwargs)
-
-
-def _visualize_boxes_and_masks_and_keypoints(
-    image, boxes, classes, scores, masks, keypoints, category_index, **kwargs):
-  return visualize_boxes_and_labels_on_image_array(
-      image,
-      boxes,
-      classes,
-      scores,
-      category_index=category_index,
-      instance_masks=masks,
-      keypoints=keypoints,
-      **kwargs)
-
-
-def draw_bounding_boxes_on_image_tensors(images,
-                                         boxes,
-                                         classes,
-                                         scores,
-                                         category_index,
-                                         instance_masks=None,
-                                         keypoints=None,
-                                         max_boxes_to_draw=20,
-                                         min_score_thresh=0.2,
-                                         use_normalized_coordinates=True):
-  """Draws bounding boxes, masks, and keypoints on batch of image tensors.
-
-  Args:
-    images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
-      channels will be ignored.
-    boxes: [N, max_detections, 4] float32 tensor of detection boxes.
-    classes: [N, max_detections] int tensor of detection classes. Note that
-      classes are 1-indexed.
-    scores: [N, max_detections] float32 tensor of detection scores.
-    category_index: a dict that maps integer ids to category dicts. e.g.
-      {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
-    instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
-      instance masks.
-    keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
-      with keypoints.
-    max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
-    min_score_thresh: Minimum score threshold for visualization. Default 0.2.
-    use_normalized_coordinates: Whether to assume boxes and kepoints are in
-      normalized coordinates (as opposed to absolute coordiantes).
-      Default is True.
-
-  Returns:
-    4D image tensor of type uint8, with boxes drawn on top.
-  """
-  # Additional channels are being ignored.
-  images = images[:, :, :, 0:3]
-  visualization_keyword_args = {
-      'use_normalized_coordinates': use_normalized_coordinates,
-      'max_boxes_to_draw': max_boxes_to_draw,
-      'min_score_thresh': min_score_thresh,
-      'agnostic_mode': False,
-      'line_thickness': 4
-  }
-
-  if instance_masks is not None and keypoints is None:
-    visualize_boxes_fn = functools.partial(
-        _visualize_boxes_and_masks,
-        category_index=category_index,
-        **visualization_keyword_args)
-    elems = [images, boxes, classes, scores, instance_masks]
-  elif instance_masks is None and keypoints is not None:
-    visualize_boxes_fn = functools.partial(
-        _visualize_boxes_and_keypoints,
-        category_index=category_index,
-        **visualization_keyword_args)
-    elems = [images, boxes, classes, scores, keypoints]
-  elif instance_masks is not None and keypoints is not None:
-    visualize_boxes_fn = functools.partial(
-        _visualize_boxes_and_masks_and_keypoints,
-        category_index=category_index,
-        **visualization_keyword_args)
-    elems = [images, boxes, classes, scores, instance_masks, keypoints]
-  else:
-    visualize_boxes_fn = functools.partial(
-        _visualize_boxes,
-        category_index=category_index,
-        **visualization_keyword_args)
-    elems = [images, boxes, classes, scores]
-
-  def draw_boxes(image_and_detections):
-    """Draws boxes on image."""
-    image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections,
-                                  tf.uint8)
-    return image_with_boxes
-
-  images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False)
-  return images
-
-
-def draw_side_by_side_evaluation_image(eval_dict,
-                                       category_index,
-                                       max_boxes_to_draw=20,
-                                       min_score_thresh=0.2,
-                                       use_normalized_coordinates=True):
-  """Creates a side-by-side image with detections and groundtruth.
-
-  Bounding boxes (and instance masks, if available) are visualized on both
-  subimages.
-
-  Args:
-    eval_dict: The evaluation dictionary returned by
-      eval_util.result_dict_for_single_example().
-    category_index: A category index (dictionary) produced from a labelmap.
-    max_boxes_to_draw: The maximum number of boxes to draw for detections.
-    min_score_thresh: The minimum score threshold for showing detections.
-    use_normalized_coordinates: Whether to assume boxes and kepoints are in
-      normalized coordinates (as opposed to absolute coordiantes).
-      Default is True.
-
-  Returns:
-    A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to
-      detections, while the subimage on the right corresponds to groundtruth.
-  """
-  detection_fields = fields.DetectionResultFields()
-  input_data_fields = fields.InputDataFields()
-  instance_masks = None
-  if detection_fields.detection_masks in eval_dict:
-    instance_masks = tf.cast(
-        tf.expand_dims(eval_dict[detection_fields.detection_masks], axis=0),
-        tf.uint8)
-  keypoints = None
-  if detection_fields.detection_keypoints in eval_dict:
-    keypoints = tf.expand_dims(
-        eval_dict[detection_fields.detection_keypoints], axis=0)
-  groundtruth_instance_masks = None
-  if input_data_fields.groundtruth_instance_masks in eval_dict:
-    groundtruth_instance_masks = tf.cast(
-        tf.expand_dims(
-            eval_dict[input_data_fields.groundtruth_instance_masks], axis=0),
-        tf.uint8)
-  images_with_detections = draw_bounding_boxes_on_image_tensors(
-      eval_dict[input_data_fields.original_image],
-      tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0),
-      tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0),
-      tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0),
-      category_index,
-      instance_masks=instance_masks,
-      keypoints=keypoints,
-      max_boxes_to_draw=max_boxes_to_draw,
-      min_score_thresh=min_score_thresh,
-      use_normalized_coordinates=use_normalized_coordinates)
-  images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
-      eval_dict[input_data_fields.original_image],
-      tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0),
-      tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes], axis=0),
-      tf.expand_dims(
-          tf.ones_like(
-              eval_dict[input_data_fields.groundtruth_classes],
-              dtype=tf.float32),
-          axis=0),
-      category_index,
-      instance_masks=groundtruth_instance_masks,
-      keypoints=None,
-      max_boxes_to_draw=None,
-      min_score_thresh=0.0,
-      use_normalized_coordinates=use_normalized_coordinates)
-  return tf.concat([images_with_detections, images_with_groundtruth], axis=2)
-
-
-def draw_keypoints_on_image_array(image,
-                                  keypoints,
-                                  color='red',
-                                  radius=2,
-                                  use_normalized_coordinates=True):
-  """Draws keypoints on an image (numpy array).
-
-  Args:
-    image: a numpy array with shape [height, width, 3].
-    keypoints: a numpy array with shape [num_keypoints, 2].
-    color: color to draw the keypoints with. Default is red.
-    radius: keypoint radius. Default value is 2.
-    use_normalized_coordinates: if True (default), treat keypoint values as
-      relative to the image.  Otherwise treat them as absolute.
-  """
-  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
-  draw_keypoints_on_image(image_pil, keypoints, color, radius,
-                          use_normalized_coordinates)
-  np.copyto(image, np.array(image_pil))
-
-
-def draw_keypoints_on_image(image,
-                            keypoints,
-                            color='red',
-                            radius=2,
-                            use_normalized_coordinates=True):
-  """Draws keypoints on an image.
-
-  Args:
-    image: a PIL.Image object.
-    keypoints: a numpy array with shape [num_keypoints, 2].
-    color: color to draw the keypoints with. Default is red.
-    radius: keypoint radius. Default value is 2.
-    use_normalized_coordinates: if True (default), treat keypoint values as
-      relative to the image.  Otherwise treat them as absolute.
-  """
-  draw = ImageDraw.Draw(image)
-  im_width, im_height = image.size
-  keypoints_x = [k[1] for k in keypoints]
-  keypoints_y = [k[0] for k in keypoints]
-  if use_normalized_coordinates:
-    keypoints_x = tuple([im_width * x for x in keypoints_x])
-    keypoints_y = tuple([im_height * y for y in keypoints_y])
-  for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y):
-    draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
-                  (keypoint_x + radius, keypoint_y + radius)],
-                 outline=color, fill=color)
-
-
-def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
-  """Draws mask on an image.
-
-  Args:
-    image: uint8 numpy array with shape (img_height, img_height, 3)
-    mask: a uint8 numpy array of shape (img_height, img_height) with
-      values between either 0 or 1.
-    color: color to draw the keypoints with. Default is red.
-    alpha: transparency value between 0 and 1. (default: 0.4)
-
-  Raises:
-    ValueError: On incorrect data type for image or masks.
-  """
-  if image.dtype != np.uint8:
-    raise ValueError('`image` not of type np.uint8')
-  if mask.dtype != np.uint8:
-    raise ValueError('`mask` not of type np.uint8')
-  if np.any(np.logical_and(mask != 1, mask != 0)):
-    raise ValueError('`mask` elements should be in [0, 1]')
-  if image.shape[:2] != mask.shape:
-    raise ValueError('The image has spatial dimensions %s but the mask has '
-                     'dimensions %s' % (image.shape[:2], mask.shape))
-  rgb = ImageColor.getrgb(color)
-  pil_image = Image.fromarray(image)
-
-  solid_color = np.expand_dims(
-      np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
-  pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
-  pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
-  pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
-  np.copyto(image, np.array(pil_image.convert('RGB')))
-
-
-def visualize_boxes_and_labels_on_image_array(
-    image,
-    boxes,
-    classes,
-    scores,
-    category_index,
-    instance_masks=None,
-    instance_boundaries=None,
-    keypoints=None,
-    use_normalized_coordinates=False,
-    max_boxes_to_draw=20,
-    min_score_thresh=.5,
-    agnostic_mode=False,
-    line_thickness=4,
-    groundtruth_box_visualization_color='black',
-    skip_scores=False,
-    skip_labels=False):
-  """Overlay labeled boxes on an image with formatted scores and label names.
-
-  This function groups boxes that correspond to the same location
-  and creates a display string for each detection and overlays these
-  on the image. Note that this function modifies the image in place, and returns
-  that same image.
-
-  Args:
-    image: uint8 numpy array with shape (img_height, img_width, 3)
-    boxes: a numpy array of shape [N, 4]
-    classes: a numpy array of shape [N]. Note that class indices are 1-based,
-      and match the keys in the label map.
-    scores: a numpy array of shape [N] or None.  If scores=None, then
-      this function assumes that the boxes to be plotted are groundtruth
-      boxes and plot all boxes as black with no classes or scores.
-    category_index: a dict containing category dictionaries (each holding
-      category index `id` and category name `name`) keyed by category indices.
-    instance_masks: a numpy array of shape [N, image_height, image_width] with
-      values ranging between 0 and 1, can be None.
-    instance_boundaries: a numpy array of shape [N, image_height, image_width]
-      with values ranging between 0 and 1, can be None.
-    keypoints: a numpy array of shape [N, num_keypoints, 2], can
-      be None
-    use_normalized_coordinates: whether boxes is to be interpreted as
-      normalized coordinates or not.
-    max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
-      all boxes.
-    min_score_thresh: minimum score threshold for a box to be visualized
-    agnostic_mode: boolean (default: False) controlling whether to evaluate in
-      class-agnostic mode or not.  This mode will display scores but ignore
-      classes.
-    line_thickness: integer (default: 4) controlling line width of the boxes.
-    groundtruth_box_visualization_color: box color for visualizing groundtruth
-      boxes
-    skip_scores: whether to skip score when drawing a single detection
-    skip_labels: whether to skip label when drawing a single detection
-
-  Returns:
-    uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
-  """
-  # Create a display string (and color) for every box location, group any boxes
-  # that correspond to the same location.
-  box_to_display_str_map = collections.defaultdict(list)
-  box_to_color_map = collections.defaultdict(str)
-  box_to_instance_masks_map = {}
-  box_to_instance_boundaries_map = {}
-  box_to_keypoints_map = collections.defaultdict(list)
-  if not max_boxes_to_draw:
-    max_boxes_to_draw = boxes.shape[0]
-  for i in range(min(max_boxes_to_draw, boxes.shape[0])):
-    if scores is None or scores[i] > min_score_thresh:
-      box = tuple(boxes[i].tolist())
-      if instance_masks is not None:
-        box_to_instance_masks_map[box] = instance_masks[i]
-      if instance_boundaries is not None:
-        box_to_instance_boundaries_map[box] = instance_boundaries[i]
-      if keypoints is not None:
-        box_to_keypoints_map[box].extend(keypoints[i])
-      if scores is None:
-        box_to_color_map[box] = groundtruth_box_visualization_color
-      else:
-        display_str = ''
-        if not skip_labels:
-          if not agnostic_mode:
-            if classes[i] in category_index.keys():
-              class_name = category_index[classes[i]]['name']
-            else:
-              class_name = 'N/A'
-            display_str = str(class_name)
-        if not skip_scores:
-          if not display_str:
-            display_str = '{}%'.format(int(100*scores[i]))
-          else:
-            display_str = '{}: {}%'.format(display_str, int(100*scores[i]))
-        box_to_display_str_map[box].append(display_str)
-        if agnostic_mode:
-          box_to_color_map[box] = 'DarkOrange'
-        else:
-          box_to_color_map[box] = STANDARD_COLORS[
-              classes[i] % len(STANDARD_COLORS)]
-
-  # Draw all boxes onto image.
-  for box, color in box_to_color_map.items():
-    ymin, xmin, ymax, xmax = box
-    if instance_masks is not None:
-      draw_mask_on_image_array(
-          image,
-          box_to_instance_masks_map[box],
-          color=color
-      )
-    if instance_boundaries is not None:
-      draw_mask_on_image_array(
-          image,
-          box_to_instance_boundaries_map[box],
-          color='red',
-          alpha=1.0
-      )
-    draw_bounding_box_on_image_array(
-        image,
-        ymin,
-        xmin,
-        ymax,
-        xmax,
-        color=color,
-        thickness=line_thickness,
-        display_str_list=box_to_display_str_map[box],
-        use_normalized_coordinates=use_normalized_coordinates)
-    if keypoints is not None:
-      draw_keypoints_on_image_array(
-          image,
-          box_to_keypoints_map[box],
-          color=color,
-          radius=line_thickness / 2,
-          use_normalized_coordinates=use_normalized_coordinates)
-
-  return image
-
-
-def add_cdf_image_summary(values, name):
-  """Adds a tf.summary.image for a CDF plot of the values.
-
-  Normalizes `values` such that they sum to 1, plots the cumulative distribution
-  function and creates a tf image summary.
-
-  Args:
-    values: a 1-D float32 tensor containing the values.
-    name: name for the image summary.
-  """
-  def cdf_plot(values):
-    """Numpy function to plot CDF."""
-    normalized_values = values / np.sum(values)
-    sorted_values = np.sort(normalized_values)
-    cumulative_values = np.cumsum(sorted_values)
-    fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32)
-                            / cumulative_values.size)
-    fig = plt.figure(frameon=False)
-    ax = fig.add_subplot('111')
-    ax.plot(fraction_of_examples, cumulative_values)
-    ax.set_ylabel('cumulative normalized values')
-    ax.set_xlabel('fraction of examples')
-    fig.canvas.draw()
-    width, height = fig.get_size_inches() * fig.get_dpi()
-    image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
-        1, int(height), int(width), 3)
-    return image
-  cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
-  tf.summary.image(name, cdf_plot)
-
-
-def add_hist_image_summary(values, bins, name):
-  """Adds a tf.summary.image for a histogram plot of the values.
-
-  Plots the histogram of values and creates a tf image summary.
-
-  Args:
-    values: a 1-D float32 tensor containing the values.
-    bins: bin edges which will be directly passed to np.histogram.
-    name: name for the image summary.
-  """
-
-  def hist_plot(values, bins):
-    """Numpy function to plot hist."""
-    fig = plt.figure(frameon=False)
-    ax = fig.add_subplot('111')
-    y, x = np.histogram(values, bins=bins)
-    ax.plot(x[:-1], y)
-    ax.set_ylabel('count')
-    ax.set_xlabel('value')
-    fig.canvas.draw()
-    width, height = fig.get_size_inches() * fig.get_dpi()
-    image = np.fromstring(
-        fig.canvas.tostring_rgb(), dtype='uint8').reshape(
-            1, int(height), int(width), 3)
-    return image
-  hist_plot = tf.py_func(hist_plot, [values, bins], tf.uint8)
-  tf.summary.image(name, hist_plot)
--- a/research/cognitive_planning/viz_active_vision_dataset_main.py
+++ b/research/cognitive_planning/viz_active_vision_dataset_main.py
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Initializes at random location and visualizes the optimal path.
-
-Different modes of execution:
-1) benchmark: It generates benchmark_iter sample trajectory to random goals
-   and plots the histogram of path lengths. It can be also used to see how fast
-   it runs.
-2) vis: It visualizes the generated paths by image, semantic segmentation, and
-   so on.
-3) human: allows the user to navigate through environment from keyboard input.
-
-python viz_active_vision_dataset_main -- \
-  --mode=benchmark --benchmark_iter=1000 --gin_config=envs/configs/active_vision_config.gin
-
-python viz_active_vision_dataset_main -- \
-  --mode=vis \
-  --gin_config=envs/configs/active_vision_config.gin
-
-python viz_active_vision_dataset_main -- \
-  --mode=human \
-  --gin_config=envs/configs/active_vision_config.gin
-
-python viz_active_vision_dataset_main.py --mode=eval --eval_folder=/usr/local/google/home/$USER/checkin_log_det/evals/ --output_folder=/usr/local/google/home/$USER/test_imgs/ --gin_config=envs/configs/active_vision_config.gin
-
-"""
-
-import matplotlib
-# pylint: disable=g-import-not-at-top
-# Need Tk for interactive plots.
-matplotlib.use('TkAgg')
-import tensorflow as tf
-from matplotlib import pyplot as plt
-import numpy as np
-import os
-from pyglib import app
-from pyglib import flags
-import gin
-import cv2
-from envs import active_vision_dataset_env
-from envs import task_env
-
-
-VIS_MODE = 'vis'
-HUMAN_MODE = 'human'
-BENCHMARK_MODE = 'benchmark'
-GRAPH_MODE = 'graph'
-EVAL_MODE = 'eval'
-
-flags.DEFINE_enum('mode', VIS_MODE,
-                  [VIS_MODE, HUMAN_MODE, BENCHMARK_MODE, GRAPH_MODE, EVAL_MODE],
-                  'mode of the execution')
-flags.DEFINE_integer('benchmark_iter', 1000,
-                     'number of iterations for benchmarking')
-flags.DEFINE_string('eval_folder', '', 'the path to the eval folder')
-flags.DEFINE_string('output_folder', '',
-                    'the path to which the images and gifs are written')
-flags.DEFINE_multi_string('gin_config', [],
-                          'List of paths to a gin config files for the env.')
-flags.DEFINE_multi_string('gin_params', [],
-                          'Newline separated list of Gin parameter bindings.')
-
-mt = task_env.ModalityTypes
-FLAGS = flags.FLAGS
-
-def benchmark(env, targets):
-  """Benchmarks the speed of sequence generation by env.
-
-  Args:
-    env: environment.
-    targets: list of target classes.
-  """
-  episode_lengths = {}
-  all_init_configs = {}
-  all_actions = dict([(a, 0.) for a in env.actions])
-  for i in range(FLAGS.benchmark_iter):
-    path, actions, _, _ = env.random_step_sequence()
-    selected_actions = np.argmax(actions, axis=-1)
-    new_actions = dict([(a, 0.) for a in env.actions])
-    for a in selected_actions:
-      new_actions[env.actions[a]] += 1. / selected_actions.shape[0]
-    for a in new_actions:
-      all_actions[a] += new_actions[a] / FLAGS.benchmark_iter
-    start_image_id, world, goal = env.get_init_config(path)
-    print world
-    if world not in all_init_configs:
-      all_init_configs[world] = set()
-    all_init_configs[world].add((start_image_id, goal, len(actions)))
-    if env.goal_index not in episode_lengths:
-      episode_lengths[env.goal_index] = []
-    episode_lengths[env.goal_index].append(len(actions))
-  for i, cls in enumerate(episode_lengths):
-    plt.subplot(231 + i)
-    plt.hist(episode_lengths[cls])
-    plt.title(targets[cls])
-  plt.show()
-
-
-def human(env, targets):
-  """Lets user play around the env manually."""
-  string_key_map = {
-      'a': 'left',
-      'd': 'right',
-      'w': 'forward',
-      's': 'backward',
-      'j': 'rotate_ccw',
-      'l': 'rotate_cw',
-      'n': 'stop'
-  }
-  integer_key_map = {
-      'a': env.actions.index('left'),
-      'd': env.actions.index('right'),
-      'w': env.actions.index('forward'),
-      's': env.actions.index('backward'),
-      'j': env.actions.index('rotate_ccw'),
-      'l': env.actions.index('rotate_cw'),
-      'n': env.actions.index('stop')
-  }
-  for k in integer_key_map:
-    integer_key_map[k] = np.int32(integer_key_map[k])
-  plt.ion()
-  for _ in range(20):
-    obs = env.reset()
-    steps = -1
-    action = None
-    while True:
-      print 'distance = ', obs[task_env.ModalityTypes.DISTANCE]
-      steps += 1
-      depth_value = obs[task_env.ModalityTypes.DEPTH][:, :, 0]
-      depth_mask = obs[task_env.ModalityTypes.DEPTH][:, :, 1]
-      seg_mask = np.squeeze(obs[task_env.ModalityTypes.SEMANTIC_SEGMENTATION])
-      det_mask = np.argmax(
-          obs[task_env.ModalityTypes.OBJECT_DETECTION], axis=-1)
-      img = obs[task_env.ModalityTypes.IMAGE]
-      plt.subplot(231)
-      plt.title('steps = {}'.format(steps))
-      plt.imshow(img.astype(np.uint8))
-      plt.subplot(232)
-      plt.imshow(depth_value)
-      plt.title('depth value')
-      plt.subplot(233)
-      plt.imshow(depth_mask)
-      plt.title('depth mask')
-      plt.subplot(234)
-      plt.imshow(seg_mask)
-      plt.title('seg')
-      plt.subplot(235)
-      plt.imshow(det_mask)
-      plt.title('det')
-      plt.subplot(236)
-      plt.title('goal={}'.format(targets[env.goal_index]))
-      plt.draw()
-      while True:
-        s = raw_input('key = ')
-        if np.random.rand() > 0.5:
-          key_map = string_key_map
-        else:
-          key_map = integer_key_map
-        if s in key_map:
-          action = key_map[s]
-          break
-        else:
-          print 'invalid action'
-      print 'action = {}'.format(action)
-      if action == 'stop':
-        print 'dist to goal: {}'.format(len(env.path_to_goal()) - 2)
-        break
-      obs, reward, done, info = env.step(action)
-      print 'reward = {}, done = {}, success = {}'.format(
-          reward, done, info['success'])
-
-
-def visualize_random_step_sequence(env):
-  """Visualizes random sequence of steps."""
-  plt.ion()
-  for _ in range(20):
-    path, actions, _, step_outputs = env.random_step_sequence(max_len=30)
-    print 'path = {}'.format(path)
-    for action, step_output in zip(actions, step_outputs):
-      obs, _, done, _ = step_output
-      depth_value = obs[task_env.ModalityTypes.DEPTH][:, :, 0]
-      depth_mask = obs[task_env.ModalityTypes.DEPTH][:, :, 1]
-      seg_mask = np.squeeze(obs[task_env.ModalityTypes.SEMANTIC_SEGMENTATION])
-      det_mask = np.argmax(
-          obs[task_env.ModalityTypes.OBJECT_DETECTION], axis=-1)
-      img = obs[task_env.ModalityTypes.IMAGE]
-      plt.subplot(231)
-      plt.imshow(img.astype(np.uint8))
-      plt.subplot(232)
-      plt.imshow(depth_value)
-      plt.title('depth value')
-      plt.subplot(233)
-      plt.imshow(depth_mask)
-      plt.title('depth mask')
-      plt.subplot(234)
-      plt.imshow(seg_mask)
-      plt.title('seg')
-      plt.subplot(235)
-      plt.imshow(det_mask)
-      plt.title('det')
-      plt.subplot(236)
-      print 'action = {}'.format(action)
-      print 'done = {}'.format(done)
-      plt.draw()
-      if raw_input('press \'n\' to go to the next random sequence. Otherwise, '
-                   'press any key to continue...') == 'n':
-        break
-
-
-def visualize(env, input_folder, output_root_folder):
-  """visualizes images for sequence of steps from the evals folder."""
-  def which_env(file_name):
-    img_name = file_name.split('_')[0][2:5]
-    env_dict = {'161': 'Home_016_1', '131': 'Home_013_1', '111': 'Home_011_1'}
-    if img_name in env_dict:
-      return env_dict[img_name]
-    else:
-      raise ValueError('could not resolve env: {} {}'.format(
-          img_name, file_name))
-
-  def which_goal(file_name):
-    return file_name[file_name.find('_')+1:]
-
-  output_images_folder = os.path.join(output_root_folder, 'images')
-  output_gifs_folder = os.path.join(output_root_folder, 'gifs')
-  if not tf.gfile.IsDirectory(output_images_folder):
-    tf.gfile.MakeDirs(output_images_folder)
-  if not tf.gfile.IsDirectory(output_gifs_folder):
-    tf.gfile.MakeDirs(output_gifs_folder)
-  npy_files = [
-      os.path.join(input_folder, name)
-      for name in tf.gfile.ListDirectory(input_folder)
-      if name.find('npy') >= 0
-  ]
-  for i, npy_file in enumerate(npy_files):
-    print 'saving images {}/{}'.format(i, len(npy_files))
-    pure_name = npy_file[npy_file.rfind('/') + 1:-4]
-    output_folder = os.path.join(output_images_folder, pure_name)
-    if not tf.gfile.IsDirectory(output_folder):
-      tf.gfile.MakeDirs(output_folder)
-    print '*******'
-    print pure_name[0:pure_name.find('_')]
-    env.reset_for_eval(which_env(pure_name),
-                       which_goal(pure_name),
-                       pure_name[0:pure_name.find('_')],
-                      )
-    with tf.gfile.Open(npy_file) as h:
-      states = np.load(h).item()['states']
-      images = [
-          env.observation(state)[mt.IMAGE] for state in states
-      ]
-      for j, img in enumerate(images):
-        cv2.imwrite(os.path.join(output_folder, '{0:03d}'.format(j) + '.jpg'),
-                    img[:, :, ::-1])
-      print 'converting to gif'
-      os.system(
-          'convert -set delay 20 -colors 256 -dispose 1 {}/*.jpg {}.gif'.format(
-              output_folder,
-              os.path.join(output_gifs_folder, pure_name + '.gif')
-          )
-      )
-
-def evaluate_folder(env, folder_path):
-  """Evaluates the performance from the evals folder."""
-  targets = ['fridge', 'dining_table', 'microwave', 'tv', 'couch']
-
-  def compute_acc(npy_file):
-    with tf.gfile.Open(npy_file) as h:
-      data = np.load(h).item()
-    if npy_file.find('dining_table') >= 0:
-      category = 'dining_table'
-    else:
-      category = npy_file[npy_file.rfind('_') + 1:-4]
-    return category, data['distance'][-1] - 2
-
-  def evaluate_iteration(folder):
-    """Evaluates the data from the folder of certain eval iteration."""
-    print folder
-    npy_files = [
-        os.path.join(folder, name)
-        for name in tf.gfile.ListDirectory(folder)
-        if name.find('npy') >= 0
-    ]
-    eval_stats = {c: [] for c in targets}
-    for npy_file in npy_files:
-      try:
-        category, dist = compute_acc(npy_file)
-      except:  # pylint: disable=bare-except
-        continue
-      eval_stats[category].append(float(dist <= 5))
-    for c in eval_stats:
-      if not eval_stats[c]:
-        print 'incomplete eval {}: empty class {}'.format(folder_path, c)
-        return None
-      eval_stats[c] = np.mean(eval_stats[c])
-
-    eval_stats['mean'] = np.mean(eval_stats.values())
-    return eval_stats
-
-  checkpoint_folders = [
-      folder_path + x
-      for x in tf.gfile.ListDirectory(folder_path)
-      if tf.gfile.IsDirectory(folder_path + x)
-  ]
-
-  print '{} folders found'.format(len(checkpoint_folders))
-  print '------------------------'
-  all_iters = []
-  all_accs = []
-  for i, folder in enumerate(checkpoint_folders):
-    print 'processing {}/{}'.format(i, len(checkpoint_folders))
-    eval_stats = evaluate_iteration(folder)
-    if eval_stats is None:
-      continue
-    else:
-      iter_no = int(folder[folder.rfind('/') + 1:])
-      print 'result ', iter_no, eval_stats['mean']
-      all_accs.append(eval_stats['mean'])
-      all_iters.append(iter_no)
-
-  all_accs = np.asarray(all_accs)
-  all_iters = np.asarray(all_iters)
-  idx = np.argmax(all_accs)
-  print 'best result at iteration {} was {}'.format(all_iters[idx],
-                                                    all_accs[idx])
-  order = np.argsort(all_iters)
-  all_iters = all_iters[order]
-  all_accs = all_accs[order]
-  #plt.plot(all_iters, all_accs)
-  #plt.show()
-  #print 'done plotting'
-
-  best_iteration_folder = os.path.join(folder_path, str(all_iters[idx]))
-
-  print 'generating gifs and images for {}'.format(best_iteration_folder)
-  visualize(env, best_iteration_folder, FLAGS.output_folder)
-
-
-def main(_):
-  gin.parse_config_files_and_bindings(FLAGS.gin_config, FLAGS.gin_params)
-  print('********')
-  print(FLAGS.mode)
-  print(FLAGS.gin_config)
-  print(FLAGS.gin_params)
-
-  env = active_vision_dataset_env.ActiveVisionDatasetEnv(modality_types=[
-      task_env.ModalityTypes.IMAGE,
-      task_env.ModalityTypes.SEMANTIC_SEGMENTATION,
-      task_env.ModalityTypes.OBJECT_DETECTION, task_env.ModalityTypes.DEPTH,
-      task_env.ModalityTypes.DISTANCE
-  ])
-
-  if FLAGS.mode == BENCHMARK_MODE:
-    benchmark(env, env.possible_targets)
-  elif FLAGS.mode == GRAPH_MODE:
-    for loc in env.worlds:
-      env.check_scene_graph(loc, 'fridge')
-  elif FLAGS.mode == HUMAN_MODE:
-    human(env, env.possible_targets)
-  elif FLAGS.mode == VIS_MODE:
-    visualize_random_step_sequence(env)
-  elif FLAGS.mode == EVAL_MODE:
-    evaluate_folder(env, FLAGS.eval_folder)
-
-if __name__ == '__main__':
-  app.run(main)
--- a/research/compression/README.md
+++ b/research/compression/README.md
-![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
-![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
-![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
-
-# Compression with Neural Networks
-
-This is a [TensorFlow](http://www.tensorflow.org/) model repo containing
-research on compression with neural networks. This repo currently contains 
-code for the following papers:
-
-[Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148)
-
-## Organization
-[Image Encoder](image_encoder/): Encoding and decoding images into their binary representation.
-
-[Entropy Coder](entropy_coder/): Lossless compression of the binary representation.
-
-## Contact Info
-Model repository maintained by Nick Johnston ([nmjohn](https://github.com/nmjohn)).
--- a/research/compression/entropy_coder/README.md
+++ b/research/compression/entropy_coder/README.md
-# Neural net based entropy coding
-
-This is a [TensorFlow](http://www.tensorflow.org/) model for additional
-lossless compression of bitstreams generated by neural net based image
-encoders as described in
-[https://arxiv.org/abs/1703.10114](https://arxiv.org/abs/1703.10114).
-
-To be more specific, the entropy coder aims at compressing further binary
-codes which have a 3D tensor structure with:
-
-*   the first two dimensions of the tensors corresponding to the height and
-the width of the binary codes,
-*   the last dimension being the depth of the codes. The last dimension can be
-sliced into N groups of K, where each additional group is used by the image
-decoder to add more details to the reconstructed image.
-
-The code in this directory only contains the underlying code probability model
-but does not perform the actual compression using arithmetic coding.
-The code probability model is enough to compute the theoretical compression
-ratio.
-
-
-## Prerequisites
-The only software requirements for running the encoder and decoder is having
-Tensorflow installed.
-
-You will also need to add the top level source directory of the entropy coder
-to your `PYTHONPATH`, for example:
-
-`export PYTHONPATH=${PYTHONPATH}:/tmp/models/compression`
-
-
-## Training the entropy coder
-
-### Synthetic dataset
-If you do not have a training dataset, there is a simple code generative model
-that you can use to generate a dataset and play with the entropy coder.
-The generative model is located under dataset/gen\_synthetic\_dataset.py. Note
-that this simple generative model is not going to give good results on real
-images as it is not supposed to be close to the statistics of the binary
-representation of encoded images. Consider it as a toy dataset, no more, no
-less.
-
-To generate a synthetic dataset with 20000 samples:
-
-`mkdir -p /tmp/dataset`
-
-`python ./dataset/gen_synthetic_dataset.py --dataset_dir=/tmp/dataset/
--count=20000`
-
-Note that the generator has not been optimized at all, generating the synthetic
-dataset is currently pretty slow.
-
-### Training
-
-If you just want to play with the entropy coder trainer, here is the command
-line that can be used to train the entropy coder on the synthetic dataset:
-
-`mkdir -p /tmp/entropy_coder_train`
-
-`python ./core/entropy_coder_train.py --task=0
--train_dir=/tmp/entropy_coder_train/
--model=progressive
--model_config=./configs/synthetic/model_config.json
--train_config=./configs/synthetic/train_config.json
--input_config=./configs/synthetic/input_config.json
-`
-
-Training is configured using 3 files formatted using JSON:
-
-*   One file is used to configure the underlying entropy coder model.
-    Currently, only the *progressive* model is supported.
-    This model takes 2 mandatory parameters and an optional one:
-    *   `layer_depth`: the number of bits per layer (a.k.a. iteration).
-         Background: the image decoder takes each layer to add more detail
-         to the image.
-    *    `layer_count`: the maximum number of layers that should be supported
-         by the model. This should be equal or greater than the maximum number
-         of layers in the input binary codes.
-    *    `coded_layer_count`: This can be used to consider only partial codes,
-         keeping only the first `coded_layer_count` layers and ignoring the
-         remaining layers. If left empty, the binary codes are left unchanged.
-*   One file to configure the training, including the learning rate, ...
-    The meaning of the parameters are pretty straightforward. Note that this
-    file is only used during training and is not needed during inference.
-*   One file to specify the input dataset to use during training.
-    The dataset is formatted using tf.RecordIO.
-
-
-## Inference: file size after entropy coding.
-
-### Using a synthetic sample
-
-Here is the command line to generate a single synthetic sample formatted
-in the same way as what is provided by the image encoder:
-
-`python ./dataset/gen_synthetic_single.py
--sample_filename=/tmp/dataset/sample_0000.npz`
-
-To actually compute the additional compression ratio using the entropy coder
-trained in the previous step:
-
-`python ./core/entropy_coder_single.py
--model=progressive
--model_config=./configs/synthetic/model_config.json
--input_codes=/tmp/dataset/sample_0000.npz
--checkpoint=/tmp/entropy_coder_train/model.ckpt-209078`
-
-where the checkpoint number should be adjusted accordingly.
--- a/research/compression/entropy_coder/__init__.py
+++ b/research/compression/entropy_coder/__init__.py
--- a/research/compression/entropy_coder/all_models/__init__.py
+++ b/research/compression/entropy_coder/all_models/__init__.py
--- a/research/compression/entropy_coder/all_models/all_models.py
+++ b/research/compression/entropy_coder/all_models/all_models.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Import and register all the entropy coder models."""
-
-# pylint: disable=unused-import
-from entropy_coder.progressive import progressive
--- a/research/compression/entropy_coder/all_models/all_models_test.py
+++ b/research/compression/entropy_coder/all_models/all_models_test.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Basic test of all registered models."""
-
-import tensorflow as tf
-
-# pylint: disable=unused-import
-import all_models
-# pylint: enable=unused-import
-from entropy_coder.model import model_factory
-
-
-class AllModelsTest(tf.test.TestCase):
-
-  def testBuildModelForTraining(self):
-    factory = model_factory.GetModelRegistry()
-    model_names = factory.GetAvailableModels()
-
-    for m in model_names:
-      tf.reset_default_graph()
-
-      global_step = tf.Variable(tf.zeros([], dtype=tf.int64),
-                                trainable=False,
-                                name='global_step')
-
-      optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
-
-      batch_size = 3
-      height = 40
-      width = 20
-      depth = 5
-      binary_codes = tf.placeholder(dtype=tf.float32,
-                                    shape=[batch_size, height, width, depth])
-
-      # Create a model with the default configuration.
-      print('Creating model: {}'.format(m))
-      model = factory.CreateModel(m)
-      model.Initialize(global_step,
-                       optimizer,
-                       model.GetConfigStringForUnitTest())
-      self.assertTrue(model.loss is None, 'model: {}'.format(m))
-      self.assertTrue(model.train_op is None, 'model: {}'.format(m))
-      self.assertTrue(model.average_code_length is None, 'model: {}'.format(m))
-
-      # Build the Tensorflow graph corresponding to the model.
-      model.BuildGraph(binary_codes)
-      self.assertTrue(model.loss is not None, 'model: {}'.format(m))
-      self.assertTrue(model.average_code_length is not None,
-                      'model: {}'.format(m))
-      if model.train_op is None:
-        print('Model {} is not trainable'.format(m))
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/compression/entropy_coder/configs/gru_prime3/model_config.json
+++ b/research/compression/entropy_coder/configs/gru_prime3/model_config.json
-{
-	"layer_count": 16,
-	"layer_depth": 32
-}
--- a/research/compression/entropy_coder/configs/synthetic/input_config.json
+++ b/research/compression/entropy_coder/configs/synthetic/input_config.json
-{
-	"data": "/tmp/dataset/synthetic_dataset",
-	"unique_code_size": true
-}
--- a/research/compression/entropy_coder/configs/synthetic/model_config.json
+++ b/research/compression/entropy_coder/configs/synthetic/model_config.json
-{
-	"layer_depth": 2,
-	"layer_count": 8
-}
--- a/research/compression/entropy_coder/configs/synthetic/train_config.json
+++ b/research/compression/entropy_coder/configs/synthetic/train_config.json
-{
-	"batch_size": 4,
-	"learning_rate": 0.1,
-	"decay_rate": 0.9,
-	"samples_per_decay": 20000
-}