Add environments for efficient-hrl

d28dee0f · ofirnachum · bf8c050a · d28dee0f · d28dee0f · d28dee0f
Commit d28dee0f authored Jun 25, 2018 by ofirnachum
8 changed files
--- a/research/efficient-hrl/README.md
+++ b/research/efficient-hrl/README.md
+Code for performing Hierarchical RL based on
+"Data-Efficient Hierarchical Reinforcement Learning" by
+Ofir Nachum, Shixiang (Shane) Gu, Honglak Lee, and Sergey Levine
+(https://arxiv.org/abs/1805.08296).
+
+
+This library currently includes three of the environments used:
+Ant Maze, Ant Push, and Ant Fall.
+
+The training code is planned to be open-sourced at a later time.
+
+
+Requirements:
+* TensorFlow (see http://www.tensorflow.org for how to install/upgrade)
+* OpenAI Gym (see http://gym.openai.com/docs, be sure to install MuJoCo as well)
+* NumPy (see http://www.numpy.org/)
+
+
+Quick Start:
+
+Run a random policy on AntMaze (or AntPush, AntFall):
+
+```
+python environments/__init__.py --env=AntMaze
+```
+
+
+Maintained by Ofir Nachum (ofirnachum).
--- a/research/efficient-hrl/environments/__init__.py
+++ b/research/efficient-hrl/environments/__init__.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Random policy on an environment."""
+
+import tensorflow as tf
+import numpy as np
+import random
+
+import create_maze_env
+
+app = tf.app
+flags = tf.flags
+logging = tf.logging
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('env', 'AntMaze', 'environment name: AntMaze, AntPush, or AntFall')
+flags.DEFINE_integer('episode_length', 500, 'episode length')
+flags.DEFINE_integer('num_episodes', 50, 'number of episodes')
+
+
+def get_goal_sample_fn(env_name):
+  if env_name == 'AntMaze':
+    # NOTE: When evaluating (i.e. the metrics shown in the paper,
+    # we use the commented out goal sampling function.  The uncommented
+    # one is only used for training.
+    #return lambda: np.array([0., 16.])
+    return lambda: np.random.uniform((-4, -4), (20, 20))
+  elif env_name == 'AntPush':
+    return lambda: np.array([0., 19.])
+  elif env_name == 'AntFall':
+    return lambda: np.array([0., 27., 4.5])
+  else:
+    assert False, 'Unknown env'
+
+
+def get_reward_fn(env_name):
+  if env_name == 'AntMaze':
+    return lambda obs, goal: -np.sum(np.square(obs[:2] - goal)) ** 0.5
+  elif env_name == 'AntPush':
+    return lambda obs, goal: -np.sum(np.square(obs[:2] - goal)) ** 0.5
+  elif env_name == 'AntFall':
+    return lambda obs, goal: -np.sum(np.square(obs[:3] - goal)) ** 0.5
+  else:
+    assert False, 'Unknown env'
+
+
+def success_fn(last_reward):
+  return last_reward > -5.0
+
+
+class EnvWithGoal(object):
+
+  def __init__(self, base_env, env_name):
+    self.base_env = base_env
+    self.goal_sample_fn = get_goal_sample_fn(env_name)
+    self.reward_fn = get_reward_fn(env_name)
+    self.goal = None
+
+  def reset(self):
+    obs = self.base_env.reset()
+    self.goal = self.goal_sample_fn()
+    return np.concatenate([obs, self.goal])
+
+  def step(self, a):
+    obs, _, done, info = self.base_env.step(a)
+    reward = self.reward_fn(obs, self.goal)
+    return np.concatenate([obs, self.goal]), reward, done, info
+
+  @property
+  def action_space(self):
+    return self.base_env.action_space
+
+
+def run_environment(env_name, episode_length, num_episodes):
+  env = EnvWithGoal(
+      create_maze_env.create_maze_env(env_name),
+      env_name)
+
+  def action_fn(obs):
+    action_space = env.action_space
+    action_space_mean = (action_space.low + action_space.high) / 2.0
+    action_space_magn = (action_space.high - action_space.low) / 2.0
+    random_action = (action_space_mean +
+                     action_space_magn *
+                     np.random.uniform(low=-1.0, high=1.0,
+                                       size=action_space.shape))
+    return random_action
+
+  rewards = []
+  successes = []
+  for ep in range(num_episodes):
+    rewards.append(0.0)
+    successes.append(False)
+    obs = env.reset()
+    for _ in range(episode_length):
+      obs, reward, done, _ = env.step(action_fn(obs))
+      rewards[-1] += reward
+      successes[-1] = success_fn(reward)
+      if done:
+        break
+    logging.info('Episode %d reward: %.2f, Success: %d', ep + 1, rewards[-1], successes[-1])
+
+  logging.info('Average Reward over %d episodes: %.2f',
+               num_episodes, np.mean(rewards))
+  logging.info('Average Success over %d episodes: %.2f',
+               num_episodes, np.mean(successes))
+
+
+def main(unused_argv):
+  logging.set_verbosity(logging.INFO)
+  run_environment(FLAGS.env, FLAGS.episode_length, FLAGS.num_episodes)
+
+
+if __name__ == '__main__':
+  app.run()
--- a/research/efficient-hrl/environments/ant.py
+++ b/research/efficient-hrl/environments/ant.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Wrapper for creating the ant environment in gym_mujoco."""
+
+import math
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+  FILE = "ant.xml"
+
+  def __init__(self, file_path=None, expose_all_qpos=True,
+               expose_body_coms=None, expose_body_comvels=None):
+    self._expose_all_qpos = expose_all_qpos
+    self._expose_body_coms = expose_body_coms
+    self._expose_body_comvels = expose_body_comvels
+    self._body_com_indices = {}
+    self._body_comvel_indices = {}
+
+    mujoco_env.MujocoEnv.__init__(self, file_path, 5)
+    utils.EzPickle.__init__(self)
+
+  @property
+  def physics(self):
+    return self.model
+
+  def _step(self, a):
+    return self.step(a)
+
+  def step(self, a):
+    xposbefore = self.get_body_com("torso")[0]
+    self.do_simulation(a, self.frame_skip)
+    xposafter = self.get_body_com("torso")[0]
+    forward_reward = (xposafter - xposbefore) / self.dt
+    ctrl_cost = .5 * np.square(a).sum()
+    survive_reward = 1.0
+    reward = forward_reward - ctrl_cost + survive_reward
+    state = self.state_vector()
+    done = False
+    ob = self._get_obs()
+    return ob, reward, done, dict(
+        reward_forward=forward_reward,
+        reward_ctrl=-ctrl_cost,
+        reward_survive=survive_reward)
+
+  def _get_obs(self):
+    # No cfrc observation
+    if self._expose_all_qpos:
+      obs = np.concatenate([
+          self.physics.data.qpos.flat[:15],  # Ensures only ant obs.
+          self.physics.data.qvel.flat[:14],
+      ])
+    else:
+      obs = np.concatenate([
+          self.physics.data.qpos.flat[2:15],
+          self.physics.data.qvel.flat[:14],
+      ])
+
+    if self._expose_body_coms is not None:
+      for name in self._expose_body_coms:
+        com = self.get_body_com(name)
+        if name not in self._body_com_indices:
+          indices = range(len(obs), len(obs) + len(com))
+          self._body_com_indices[name] = indices
+        obs = np.concatenate([obs, com])
+
+    if self._expose_body_comvels is not None:
+      for name in self._expose_body_comvels:
+        comvel = self.get_body_comvel(name)
+        if name not in self._body_comvel_indices:
+          indices = range(len(obs), len(obs) + len(comvel))
+          self._body_comvel_indices[name] = indices
+        obs = np.concatenate([obs, comvel])
+    return obs
+
+  def reset_model(self):
+    qpos = self.init_qpos + self.np_random.uniform(
+        size=self.model.nq, low=-.1, high=.1)
+    qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
+
+    # Set everything other than ant to original position and 0 velocity.
+    qpos[15:] = self.init_qpos[15:]
+    qvel[14:] = 0.
+    self.set_state(qpos, qvel)
+    return self._get_obs()
+
+  def viewer_setup(self):
+    self.viewer.cam.distance = self.model.stat.extent * 0.5
--- a/research/efficient-hrl/environments/ant_maze_env.py
+++ b/research/efficient-hrl/environments/ant_maze_env.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from maze_env import MazeEnv
+from ant import AntEnv
+
+
+class AntMazeEnv(MazeEnv):
+    MODEL_CLASS = AntEnv
--- a/research/efficient-hrl/environments/assets/ant.xml
+++ b/research/efficient-hrl/environments/assets/ant.xml
+<mujoco model="ant">
+  <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
+  <option timestep="0.02" integrator="RK4" />
+  <custom>
+    <numeric name="init_qpos" data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" />
+  </custom>
+  <default>
+    <joint limited="true" armature="1" damping="1" />
+    <geom condim="3" conaffinity="0" margin="0.01" friction="1 0.5 0.5" solref=".02 1" solimp=".8 .8 .01" rgba="0.8 0.6 0.4 1" density="5.0" />
+  </default>
+  <asset>
+    <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
+    <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
+    <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
+    <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
+    <material name='geom' texture="texgeom" texuniform="true" />
+  </asset>
+  <worldbody>
+    <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
+    <geom name='floor' pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
+    <body name="torso" pos="0 0 0.75">
+      <geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
+      <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
+      <body name="front_left_leg" pos="0 0 0">
+        <geom name="aux_1_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
+        <body name="aux_1" pos="0.2 0.2 0">
+          <joint name="hip_1" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
+          <geom name="left_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
+          <body pos="0.2 0.2 0">
+            <joint name="ankle_1" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="30 70" />
+            <geom name="left_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 0.4 0.0" />
+          </body>
+        </body>
+      </body>
+      <body name="front_right_leg" pos="0 0 0">
+        <geom name="aux_2_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
+        <body name="aux_2" pos="-0.2 0.2 0">
+          <joint name="hip_2" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
+          <geom name="right_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
+          <body pos="-0.2 0.2 0">
+            <joint name="ankle_2" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="-70 -30" />
+            <geom name="right_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 0.4 0.0" />
+          </body>
+        </body>
+      </body>
+      <body name="back_leg" pos="0 0 0">
+        <geom name="aux_3_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
+        <body name="aux_3" pos="-0.2 -0.2 0">
+          <joint name="hip_3" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
+          <geom name="back_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
+          <body pos="-0.2 -0.2 0">
+            <joint name="ankle_3" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="-70 -30" />
+            <geom name="third_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" />
+          </body>
+        </body>
+      </body>
+      <body name="right_back_leg" pos="0 0 0">
+        <geom name="aux_4_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
+        <body name="aux_4" pos="0.2 -0.2 0">
+          <joint name="hip_4" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
+          <geom name="rightback_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
+          <body pos="0.2 -0.2 0">
+            <joint name="ankle_4" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="30 70" />
+            <geom name="fourth_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 -0.4 0.0" />
+          </body>
+        </body>
+      </body>
+    </body>
+
+  </worldbody>
+  <actuator>
+    <motor joint="hip_4" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="ankle_4" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="hip_1" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="ankle_1" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="hip_2" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="ankle_2" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="hip_3" ctrlrange="-30.0 30.0" ctrllimited="true" />
+    <motor joint="ankle_3" ctrlrange="-30.0 30.0" ctrllimited="true" />
+  </actuator>
+</mujoco>
--- a/research/efficient-hrl/environments/create_maze_env.py
+++ b/research/efficient-hrl/environments/create_maze_env.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from ant_maze_env import AntMazeEnv
+
+
+def create_maze_env(env_name=None):
+  maze_id = None
+  if env_name.startswith('AntMaze'):
+    maze_id = 'Maze'
+  elif env_name.startswith('AntPush'):
+    maze_id = 'Push'
+  elif env_name.startswith('AntFall'):
+    maze_id = 'Fall'
+  else:
+    raise ValueError('Unknown maze environment %s' % env_name)
+
+  return AntMazeEnv(maze_id=maze_id)
--- a/research/efficient-hrl/environments/maze_env.py
+++ b/research/efficient-hrl/environments/maze_env.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adapted from rllab maze_env.py."""
+
+import os
+import tempfile
+import xml.etree.ElementTree as ET
+import math
+import numpy as np
+import gym
+
+import maze_env_utils
+
+# Directory that contains mujoco xml files.
+MODEL_DIR = 'environments/assets'
+
+
+class MazeEnv(gym.Env):
+  MODEL_CLASS = None
+
+  MAZE_HEIGHT = None
+  MAZE_SIZE_SCALING = None
+
+  def __init__(
+      self,
+      maze_id=None,
+      maze_height=0.5,
+      maze_size_scaling=8,
+      *args,
+      **kwargs):
+    self._maze_id = maze_id
+
+    model_cls = self.__class__.MODEL_CLASS
+    if model_cls is None:
+      raise "MODEL_CLASS unspecified!"
+    xml_path = os.path.join(MODEL_DIR, model_cls.FILE)
+    tree = ET.parse(xml_path)
+    worldbody = tree.find(".//worldbody")
+
+    self.MAZE_HEIGHT = height = maze_height
+    self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling
+    self.MAZE_STRUCTURE = structure = maze_env_utils.construct_maze(maze_id=self._maze_id)
+    self.elevated = any(-1 in row for row in structure)  # Elevate the maze to allow for falling.
+    self.blocks = any(
+        any(maze_env_utils.can_move(r) for r in row)
+        for row in structure)  # Are there any movable blocks?
+
+    torso_x, torso_y = self._find_robot()
+    self._init_torso_x = torso_x
+    self._init_torso_y = torso_y
+
+    height_offset = 0.
+    if self.elevated:
+      # Increase initial z-pos of ant.
+      height_offset = height * size_scaling
+      torso = tree.find(".//body[@name='torso']")
+      torso.set('pos', '0 0 %.2f' % (0.75 + height_offset))
+    if self.blocks:
+      # If there are movable blocks, change simulation settings to perform
+      # better contact detection.
+      default = tree.find(".//default")
+      default.find('.//geom').set('solimp', '.995 .995 .01')
+
+    for i in range(len(structure)):
+      for j in range(len(structure[0])):
+        if self.elevated and structure[i][j] not in [-1]:
+          # Create elevated platform.
+          ET.SubElement(
+              worldbody, "geom",
+              name="elevated_%d_%d" % (i, j),
+              pos="%f %f %f" % (j * size_scaling - torso_x,
+                                i * size_scaling - torso_y,
+                                height / 2 * size_scaling),
+              size="%f %f %f" % (0.5 * size_scaling,
+                                 0.5 * size_scaling,
+                                 height / 2 * size_scaling),
+              type="box",
+              material="",
+              contype="1",
+              conaffinity="1",
+              rgba="0.9 0.9 0.9 1",
+          )
+        if structure[i][j] == 1:  # Unmovable block.
+          # Offset all coordinates so that robot starts at the origin.
+          ET.SubElement(
+              worldbody, "geom",
+              name="block_%d_%d" % (i, j),
+              pos="%f %f %f" % (j * size_scaling - torso_x,
+                                i * size_scaling - torso_y,
+                                height_offset +
+                                height / 2 * size_scaling),
+              size="%f %f %f" % (0.5 * size_scaling,
+                                 0.5 * size_scaling,
+                                 height / 2 * size_scaling),
+              type="box",
+              material="",
+              contype="1",
+              conaffinity="1",
+              rgba="0.4 0.4 0.4 1",
+          )
+        elif maze_env_utils.can_move(structure[i][j]):  # Movable block.
+          # The "falling" blocks are shrunk slightly and increased in mass to
+          # ensure that it can fall easily through a gap in the platform blocks.
+          falling = maze_env_utils.can_move_z(structure[i][j])
+          shrink = 0.99 if falling else 1.0
+          moveable_body = ET.SubElement(
+              worldbody, "body",
+              name="moveable_%d_%d" % (i, j),
+              pos="%f %f %f" % (j * size_scaling - torso_x,
+                                i * size_scaling - torso_y,
+                                height_offset +
+                                height / 2 * size_scaling),
+          )
+          ET.SubElement(
+              moveable_body, "geom",
+              name="block_%d_%d" % (i, j),
+              pos="0 0 0",
+              size="%f %f %f" % (0.5 * size_scaling * shrink,
+                                 0.5 * size_scaling * shrink,
+                                 height / 2 * size_scaling),
+              type="box",
+              material="",
+              mass="0.001" if falling else "0.0002",
+              contype="1",
+              conaffinity="1",
+              rgba="0.9 0.1 0.1 1"
+          )
+          if maze_env_utils.can_move_x(structure[i][j]):
+            ET.SubElement(
+                moveable_body, "joint",
+                armature="0",
+                axis="1 0 0",
+                damping="0.0",
+                limited="true" if falling else "false",
+                range="%f %f" % (-size_scaling, size_scaling),
+                margin="0.01",
+                name="moveable_x_%d_%d" % (i, j),
+                pos="0 0 0",
+                type="slide"
+            )
+          if maze_env_utils.can_move_y(structure[i][j]):
+            ET.SubElement(
+                moveable_body, "joint",
+                armature="0",
+                axis="0 1 0",
+                damping="0.0",
+                limited="true" if falling else "false",
+                range="%f %f" % (-size_scaling, size_scaling),
+                margin="0.01",
+                name="moveable_y_%d_%d" % (i, j),
+                pos="0 0 0",
+                type="slide"
+            )
+          if maze_env_utils.can_move_z(structure[i][j]):
+            ET.SubElement(
+                moveable_body, "joint",
+                armature="0",
+                axis="0 0 1",
+                damping="0.0",
+                limited="true",
+                range="%f 0" % (-height_offset),
+                margin="0.01",
+                name="moveable_z_%d_%d" % (i, j),
+                pos="0 0 0",
+                type="slide"
+            )
+
+    torso = tree.find(".//body[@name='torso']")
+    geoms = torso.findall(".//geom")
+    for geom in geoms:
+      if 'name' not in geom.attrib:
+        raise Exception("Every geom of the torso must have a name "
+                        "defined")
+
+    _, file_path = tempfile.mkstemp(text=True)
+    tree.write(file_path)
+
+    self.wrapped_env = model_cls(*args, file_path=file_path, **kwargs)
+
+  def _get_obs(self):
+    return np.concatenate([self.wrapped_env._get_obs(),
+                           [self.t * 0.001]])
+
+  def reset(self):
+    self.t = 0
+    self.wrapped_env.reset()
+    return self._get_obs()
+
+  @property
+  def viewer(self):
+    return self.wrapped_env.viewer
+
+  def render(self, *args, **kwargs):
+    return self.wrapped_env.render(*args, **kwargs)
+
+  @property
+  def observation_space(self):
+    shape = self._get_obs().shape
+    high = np.inf * np.ones(shape)
+    low = -high
+    return gym.spaces.Box(low, high)
+
+  @property
+  def action_space(self):
+    return self.wrapped_env.action_space
+
+  def _find_robot(self):
+    structure = self.MAZE_STRUCTURE
+    size_scaling = self.MAZE_SIZE_SCALING
+    for i in range(len(structure)):
+      for j in range(len(structure[0])):
+        if structure[i][j] == 'r':
+          return j * size_scaling, i * size_scaling
+    assert False, 'No robot in maze specification.'
+
+  def step(self, action):
+    self.t += 1
+    inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action)
+    next_obs = self._get_obs()
+    done = False
+    return next_obs, inner_reward, done, info
--- a/research/efficient-hrl/environments/maze_env_utils.py
+++ b/research/efficient-hrl/environments/maze_env_utils.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adapted from rllab maze_env_utils.py."""
+import numpy as np
+import math
+
+
+class Move(object):
+  X = 11
+  Y = 12
+  Z = 13
+  XY = 14
+  XZ = 15
+  YZ = 16
+  XYZ = 17
+
+
+def can_move_x(movable):
+  return movable in [Move.X, Move.XY, Move.XZ, Move.XYZ]
+
+
+def can_move_y(movable):
+  return movable in [Move.Y, Move.XY, Move.YZ, Move.XYZ]
+
+
+def can_move_z(movable):
+  return movable in [Move.Z, Move.XZ, Move.YZ, Move.XYZ]
+
+
+def can_move(movable):
+  return can_move_x(movable) or can_move_y(movable) or can_move_z(movable)
+
+
+def construct_maze(maze_id='Maze'):
+  if maze_id == 'Maze':
+    structure = [
+        [1, 1, 1, 1, 1],
+        [1, 'r', 0, 0, 1],
+        [1, 1, 1, 0, 1],
+        [1, 0, 0, 0, 1],
+        [1, 1, 1, 1, 1],
+    ]
+  elif maze_id == 'Push':
+    structure = [
+        [1, 1,  1,  1,   1],
+        [1, 0, 'r', 1,   1],
+        [1, 0,  Move.XY, 0,  1],
+        [1, 1,  0,  1,   1],
+        [1, 1,  1,  1,   1],
+    ]
+  elif maze_id == 'Fall':
+    structure = [
+        [1, 1,   1,  1],
+        [1, 'r', 0,  1],
+        [1, 0,   Move.YZ,  1],
+        [1, -1, -1,  1],
+        [1, 0,   0,  1],
+        [1, 1,   1,  1],
+    ]
+  else:
+      raise NotImplementedError('The provided MazeId %s is not recognized' % maze_id)
+
+  return structure