Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
d28dee0f
Commit
d28dee0f
authored
Jun 25, 2018
by
ofirnachum
Browse files
Add environments for efficient-hrl
parent
bf8c050a
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
702 additions
and
0 deletions
+702
-0
research/efficient-hrl/README.md
research/efficient-hrl/README.md
+28
-0
research/efficient-hrl/environments/__init__.py
research/efficient-hrl/environments/__init__.py
+129
-0
research/efficient-hrl/environments/ant.py
research/efficient-hrl/environments/ant.py
+103
-0
research/efficient-hrl/environments/ant_maze_env.py
research/efficient-hrl/environments/ant_maze_env.py
+21
-0
research/efficient-hrl/environments/assets/ant.xml
research/efficient-hrl/environments/assets/ant.xml
+81
-0
research/efficient-hrl/environments/create_maze_env.py
research/efficient-hrl/environments/create_maze_env.py
+30
-0
research/efficient-hrl/environments/maze_env.py
research/efficient-hrl/environments/maze_env.py
+234
-0
research/efficient-hrl/environments/maze_env_utils.py
research/efficient-hrl/environments/maze_env_utils.py
+76
-0
No files found.
research/efficient-hrl/README.md
0 → 100755
View file @
d28dee0f
Code for performing Hierarchical RL based on
"Data-Efficient Hierarchical Reinforcement Learning" by
Ofir Nachum, Shixiang (Shane) Gu, Honglak Lee, and Sergey Levine
(https://arxiv.org/abs/1805.08296).
This library currently includes three of the environments used:
Ant Maze, Ant Push, and Ant Fall.
The training code is planned to be open-sourced at a later time.
Requirements:
*
TensorFlow (see http://www.tensorflow.org for how to install/upgrade)
*
OpenAI Gym (see http://gym.openai.com/docs, be sure to install MuJoCo as well)
*
NumPy (see http://www.numpy.org/)
Quick Start:
Run a random policy on AntMaze (or AntPush, AntFall):
```
python environments/__init__.py --env=AntMaze
```
Maintained by Ofir Nachum (ofirnachum).
research/efficient-hrl/environments/__init__.py
0 → 100755
View file @
d28dee0f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Random policy on an environment."""
import
tensorflow
as
tf
import
numpy
as
np
import
random
import
create_maze_env
app
=
tf
.
app
flags
=
tf
.
flags
logging
=
tf
.
logging
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'env'
,
'AntMaze'
,
'environment name: AntMaze, AntPush, or AntFall'
)
flags
.
DEFINE_integer
(
'episode_length'
,
500
,
'episode length'
)
flags
.
DEFINE_integer
(
'num_episodes'
,
50
,
'number of episodes'
)
def
get_goal_sample_fn
(
env_name
):
if
env_name
==
'AntMaze'
:
# NOTE: When evaluating (i.e. the metrics shown in the paper,
# we use the commented out goal sampling function. The uncommented
# one is only used for training.
#return lambda: np.array([0., 16.])
return
lambda
:
np
.
random
.
uniform
((
-
4
,
-
4
),
(
20
,
20
))
elif
env_name
==
'AntPush'
:
return
lambda
:
np
.
array
([
0.
,
19.
])
elif
env_name
==
'AntFall'
:
return
lambda
:
np
.
array
([
0.
,
27.
,
4.5
])
else
:
assert
False
,
'Unknown env'
def
get_reward_fn
(
env_name
):
if
env_name
==
'AntMaze'
:
return
lambda
obs
,
goal
:
-
np
.
sum
(
np
.
square
(
obs
[:
2
]
-
goal
))
**
0.5
elif
env_name
==
'AntPush'
:
return
lambda
obs
,
goal
:
-
np
.
sum
(
np
.
square
(
obs
[:
2
]
-
goal
))
**
0.5
elif
env_name
==
'AntFall'
:
return
lambda
obs
,
goal
:
-
np
.
sum
(
np
.
square
(
obs
[:
3
]
-
goal
))
**
0.5
else
:
assert
False
,
'Unknown env'
def
success_fn
(
last_reward
):
return
last_reward
>
-
5.0
class
EnvWithGoal
(
object
):
def
__init__
(
self
,
base_env
,
env_name
):
self
.
base_env
=
base_env
self
.
goal_sample_fn
=
get_goal_sample_fn
(
env_name
)
self
.
reward_fn
=
get_reward_fn
(
env_name
)
self
.
goal
=
None
def
reset
(
self
):
obs
=
self
.
base_env
.
reset
()
self
.
goal
=
self
.
goal_sample_fn
()
return
np
.
concatenate
([
obs
,
self
.
goal
])
def
step
(
self
,
a
):
obs
,
_
,
done
,
info
=
self
.
base_env
.
step
(
a
)
reward
=
self
.
reward_fn
(
obs
,
self
.
goal
)
return
np
.
concatenate
([
obs
,
self
.
goal
]),
reward
,
done
,
info
@
property
def
action_space
(
self
):
return
self
.
base_env
.
action_space
def
run_environment
(
env_name
,
episode_length
,
num_episodes
):
env
=
EnvWithGoal
(
create_maze_env
.
create_maze_env
(
env_name
),
env_name
)
def
action_fn
(
obs
):
action_space
=
env
.
action_space
action_space_mean
=
(
action_space
.
low
+
action_space
.
high
)
/
2.0
action_space_magn
=
(
action_space
.
high
-
action_space
.
low
)
/
2.0
random_action
=
(
action_space_mean
+
action_space_magn
*
np
.
random
.
uniform
(
low
=-
1.0
,
high
=
1.0
,
size
=
action_space
.
shape
))
return
random_action
rewards
=
[]
successes
=
[]
for
ep
in
range
(
num_episodes
):
rewards
.
append
(
0.0
)
successes
.
append
(
False
)
obs
=
env
.
reset
()
for
_
in
range
(
episode_length
):
obs
,
reward
,
done
,
_
=
env
.
step
(
action_fn
(
obs
))
rewards
[
-
1
]
+=
reward
successes
[
-
1
]
=
success_fn
(
reward
)
if
done
:
break
logging
.
info
(
'Episode %d reward: %.2f, Success: %d'
,
ep
+
1
,
rewards
[
-
1
],
successes
[
-
1
])
logging
.
info
(
'Average Reward over %d episodes: %.2f'
,
num_episodes
,
np
.
mean
(
rewards
))
logging
.
info
(
'Average Success over %d episodes: %.2f'
,
num_episodes
,
np
.
mean
(
successes
))
def
main
(
unused_argv
):
logging
.
set_verbosity
(
logging
.
INFO
)
run_environment
(
FLAGS
.
env
,
FLAGS
.
episode_length
,
FLAGS
.
num_episodes
)
if
__name__
==
'__main__'
:
app
.
run
()
research/efficient-hrl/environments/ant.py
0 → 100755
View file @
d28dee0f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Wrapper for creating the ant environment in gym_mujoco."""
import
math
import
numpy
as
np
from
gym
import
utils
from
gym.envs.mujoco
import
mujoco_env
class
AntEnv
(
mujoco_env
.
MujocoEnv
,
utils
.
EzPickle
):
FILE
=
"ant.xml"
def
__init__
(
self
,
file_path
=
None
,
expose_all_qpos
=
True
,
expose_body_coms
=
None
,
expose_body_comvels
=
None
):
self
.
_expose_all_qpos
=
expose_all_qpos
self
.
_expose_body_coms
=
expose_body_coms
self
.
_expose_body_comvels
=
expose_body_comvels
self
.
_body_com_indices
=
{}
self
.
_body_comvel_indices
=
{}
mujoco_env
.
MujocoEnv
.
__init__
(
self
,
file_path
,
5
)
utils
.
EzPickle
.
__init__
(
self
)
@
property
def
physics
(
self
):
return
self
.
model
def
_step
(
self
,
a
):
return
self
.
step
(
a
)
def
step
(
self
,
a
):
xposbefore
=
self
.
get_body_com
(
"torso"
)[
0
]
self
.
do_simulation
(
a
,
self
.
frame_skip
)
xposafter
=
self
.
get_body_com
(
"torso"
)[
0
]
forward_reward
=
(
xposafter
-
xposbefore
)
/
self
.
dt
ctrl_cost
=
.
5
*
np
.
square
(
a
).
sum
()
survive_reward
=
1.0
reward
=
forward_reward
-
ctrl_cost
+
survive_reward
state
=
self
.
state_vector
()
done
=
False
ob
=
self
.
_get_obs
()
return
ob
,
reward
,
done
,
dict
(
reward_forward
=
forward_reward
,
reward_ctrl
=-
ctrl_cost
,
reward_survive
=
survive_reward
)
def
_get_obs
(
self
):
# No cfrc observation
if
self
.
_expose_all_qpos
:
obs
=
np
.
concatenate
([
self
.
physics
.
data
.
qpos
.
flat
[:
15
],
# Ensures only ant obs.
self
.
physics
.
data
.
qvel
.
flat
[:
14
],
])
else
:
obs
=
np
.
concatenate
([
self
.
physics
.
data
.
qpos
.
flat
[
2
:
15
],
self
.
physics
.
data
.
qvel
.
flat
[:
14
],
])
if
self
.
_expose_body_coms
is
not
None
:
for
name
in
self
.
_expose_body_coms
:
com
=
self
.
get_body_com
(
name
)
if
name
not
in
self
.
_body_com_indices
:
indices
=
range
(
len
(
obs
),
len
(
obs
)
+
len
(
com
))
self
.
_body_com_indices
[
name
]
=
indices
obs
=
np
.
concatenate
([
obs
,
com
])
if
self
.
_expose_body_comvels
is
not
None
:
for
name
in
self
.
_expose_body_comvels
:
comvel
=
self
.
get_body_comvel
(
name
)
if
name
not
in
self
.
_body_comvel_indices
:
indices
=
range
(
len
(
obs
),
len
(
obs
)
+
len
(
comvel
))
self
.
_body_comvel_indices
[
name
]
=
indices
obs
=
np
.
concatenate
([
obs
,
comvel
])
return
obs
def
reset_model
(
self
):
qpos
=
self
.
init_qpos
+
self
.
np_random
.
uniform
(
size
=
self
.
model
.
nq
,
low
=-
.
1
,
high
=
.
1
)
qvel
=
self
.
init_qvel
+
self
.
np_random
.
randn
(
self
.
model
.
nv
)
*
.
1
# Set everything other than ant to original position and 0 velocity.
qpos
[
15
:]
=
self
.
init_qpos
[
15
:]
qvel
[
14
:]
=
0.
self
.
set_state
(
qpos
,
qvel
)
return
self
.
_get_obs
()
def
viewer_setup
(
self
):
self
.
viewer
.
cam
.
distance
=
self
.
model
.
stat
.
extent
*
0.5
research/efficient-hrl/environments/ant_maze_env.py
0 → 100755
View file @
d28dee0f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from
maze_env
import
MazeEnv
from
ant
import
AntEnv
class
AntMazeEnv
(
MazeEnv
):
MODEL_CLASS
=
AntEnv
research/efficient-hrl/environments/assets/ant.xml
0 → 100755
View file @
d28dee0f
<mujoco
model=
"ant"
>
<compiler
inertiafromgeom=
"true"
angle=
"degree"
coordinate=
"local"
/>
<option
timestep=
"0.02"
integrator=
"RK4"
/>
<custom>
<numeric
name=
"init_qpos"
data=
"0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0"
/>
</custom>
<default>
<joint
limited=
"true"
armature=
"1"
damping=
"1"
/>
<geom
condim=
"3"
conaffinity=
"0"
margin=
"0.01"
friction=
"1 0.5 0.5"
solref=
".02 1"
solimp=
".8 .8 .01"
rgba=
"0.8 0.6 0.4 1"
density=
"5.0"
/>
</default>
<asset>
<texture
type=
"skybox"
builtin=
"gradient"
width=
"100"
height=
"100"
rgb1=
"1 1 1"
rgb2=
"0 0 0"
/>
<texture
name=
"texgeom"
type=
"cube"
builtin=
"flat"
mark=
"cross"
width=
"127"
height=
"1278"
rgb1=
"0.8 0.6 0.4"
rgb2=
"0.8 0.6 0.4"
markrgb=
"1 1 1"
random=
"0.01"
/>
<texture
name=
"texplane"
type=
"2d"
builtin=
"checker"
rgb1=
"0 0 0"
rgb2=
"0.8 0.8 0.8"
width=
"100"
height=
"100"
/>
<material
name=
'MatPlane'
texture=
"texplane"
shininess=
"1"
texrepeat=
"60 60"
specular=
"1"
reflectance=
"0.5"
/>
<material
name=
'geom'
texture=
"texgeom"
texuniform=
"true"
/>
</asset>
<worldbody>
<light
directional=
"true"
cutoff=
"100"
exponent=
"1"
diffuse=
"1 1 1"
specular=
".1 .1 .1"
pos=
"0 0 1.3"
dir=
"-0 0 -1.3"
/>
<geom
name=
'floor'
pos=
'0 0 0'
size=
'40 40 40'
type=
'plane'
conaffinity=
'1'
rgba=
'0.8 0.9 0.8 1'
condim=
'3'
/>
<body
name=
"torso"
pos=
"0 0 0.75"
>
<geom
name=
"torso_geom"
type=
"sphere"
size=
"0.25"
pos=
"0 0 0"
/>
<joint
name=
"root"
type=
"free"
limited=
"false"
pos=
"0 0 0"
axis=
"0 0 1"
margin=
"0.01"
armature=
"0"
damping=
"0"
/>
<body
name=
"front_left_leg"
pos=
"0 0 0"
>
<geom
name=
"aux_1_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 0.2 0.2 0.0"
/>
<body
name=
"aux_1"
pos=
"0.2 0.2 0"
>
<joint
name=
"hip_1"
type=
"hinge"
pos=
"0.0 0.0 0.0"
axis=
"0 0 1"
range=
"-30 30"
/>
<geom
name=
"left_leg_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 0.2 0.2 0.0"
/>
<body
pos=
"0.2 0.2 0"
>
<joint
name=
"ankle_1"
type=
"hinge"
pos=
"0.0 0.0 0.0"
axis=
"-1 1 0"
range=
"30 70"
/>
<geom
name=
"left_ankle_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 0.4 0.4 0.0"
/>
</body>
</body>
</body>
<body
name=
"front_right_leg"
pos=
"0 0 0"
>
<geom
name=
"aux_2_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 -0.2 0.2 0.0"
/>
<body
name=
"aux_2"
pos=
"-0.2 0.2 0"
>
<joint
name=
"hip_2"
type=
"hinge"
pos=
"0.0 0.0 0.0"
axis=
"0 0 1"
range=
"-30 30"
/>
<geom
name=
"right_leg_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 -0.2 0.2 0.0"
/>
<body
pos=
"-0.2 0.2 0"
>
<joint
name=
"ankle_2"
type=
"hinge"
pos=
"0.0 0.0 0.0"
axis=
"1 1 0"
range=
"-70 -30"
/>
<geom
name=
"right_ankle_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 -0.4 0.4 0.0"
/>
</body>
</body>
</body>
<body
name=
"back_leg"
pos=
"0 0 0"
>
<geom
name=
"aux_3_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 -0.2 -0.2 0.0"
/>
<body
name=
"aux_3"
pos=
"-0.2 -0.2 0"
>
<joint
name=
"hip_3"
type=
"hinge"
pos=
"0.0 0.0 0.0"
axis=
"0 0 1"
range=
"-30 30"
/>
<geom
name=
"back_leg_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 -0.2 -0.2 0.0"
/>
<body
pos=
"-0.2 -0.2 0"
>
<joint
name=
"ankle_3"
type=
"hinge"
pos=
"0.0 0.0 0.0"
axis=
"-1 1 0"
range=
"-70 -30"
/>
<geom
name=
"third_ankle_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 -0.4 -0.4 0.0"
/>
</body>
</body>
</body>
<body
name=
"right_back_leg"
pos=
"0 0 0"
>
<geom
name=
"aux_4_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 0.2 -0.2 0.0"
/>
<body
name=
"aux_4"
pos=
"0.2 -0.2 0"
>
<joint
name=
"hip_4"
type=
"hinge"
pos=
"0.0 0.0 0.0"
axis=
"0 0 1"
range=
"-30 30"
/>
<geom
name=
"rightback_leg_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 0.2 -0.2 0.0"
/>
<body
pos=
"0.2 -0.2 0"
>
<joint
name=
"ankle_4"
type=
"hinge"
pos=
"0.0 0.0 0.0"
axis=
"1 1 0"
range=
"30 70"
/>
<geom
name=
"fourth_ankle_geom"
type=
"capsule"
size=
"0.08"
fromto=
"0.0 0.0 0.0 0.4 -0.4 0.0"
/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor
joint=
"hip_4"
ctrlrange=
"-30.0 30.0"
ctrllimited=
"true"
/>
<motor
joint=
"ankle_4"
ctrlrange=
"-30.0 30.0"
ctrllimited=
"true"
/>
<motor
joint=
"hip_1"
ctrlrange=
"-30.0 30.0"
ctrllimited=
"true"
/>
<motor
joint=
"ankle_1"
ctrlrange=
"-30.0 30.0"
ctrllimited=
"true"
/>
<motor
joint=
"hip_2"
ctrlrange=
"-30.0 30.0"
ctrllimited=
"true"
/>
<motor
joint=
"ankle_2"
ctrlrange=
"-30.0 30.0"
ctrllimited=
"true"
/>
<motor
joint=
"hip_3"
ctrlrange=
"-30.0 30.0"
ctrllimited=
"true"
/>
<motor
joint=
"ankle_3"
ctrlrange=
"-30.0 30.0"
ctrllimited=
"true"
/>
</actuator>
</mujoco>
research/efficient-hrl/environments/create_maze_env.py
0 → 100755
View file @
d28dee0f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from
ant_maze_env
import
AntMazeEnv
def
create_maze_env
(
env_name
=
None
):
maze_id
=
None
if
env_name
.
startswith
(
'AntMaze'
):
maze_id
=
'Maze'
elif
env_name
.
startswith
(
'AntPush'
):
maze_id
=
'Push'
elif
env_name
.
startswith
(
'AntFall'
):
maze_id
=
'Fall'
else
:
raise
ValueError
(
'Unknown maze environment %s'
%
env_name
)
return
AntMazeEnv
(
maze_id
=
maze_id
)
research/efficient-hrl/environments/maze_env.py
0 → 100755
View file @
d28dee0f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Adapted from rllab maze_env.py."""
import
os
import
tempfile
import
xml.etree.ElementTree
as
ET
import
math
import
numpy
as
np
import
gym
import
maze_env_utils
# Directory that contains mujoco xml files.
MODEL_DIR
=
'environments/assets'
class
MazeEnv
(
gym
.
Env
):
MODEL_CLASS
=
None
MAZE_HEIGHT
=
None
MAZE_SIZE_SCALING
=
None
def
__init__
(
self
,
maze_id
=
None
,
maze_height
=
0.5
,
maze_size_scaling
=
8
,
*
args
,
**
kwargs
):
self
.
_maze_id
=
maze_id
model_cls
=
self
.
__class__
.
MODEL_CLASS
if
model_cls
is
None
:
raise
"MODEL_CLASS unspecified!"
xml_path
=
os
.
path
.
join
(
MODEL_DIR
,
model_cls
.
FILE
)
tree
=
ET
.
parse
(
xml_path
)
worldbody
=
tree
.
find
(
".//worldbody"
)
self
.
MAZE_HEIGHT
=
height
=
maze_height
self
.
MAZE_SIZE_SCALING
=
size_scaling
=
maze_size_scaling
self
.
MAZE_STRUCTURE
=
structure
=
maze_env_utils
.
construct_maze
(
maze_id
=
self
.
_maze_id
)
self
.
elevated
=
any
(
-
1
in
row
for
row
in
structure
)
# Elevate the maze to allow for falling.
self
.
blocks
=
any
(
any
(
maze_env_utils
.
can_move
(
r
)
for
r
in
row
)
for
row
in
structure
)
# Are there any movable blocks?
torso_x
,
torso_y
=
self
.
_find_robot
()
self
.
_init_torso_x
=
torso_x
self
.
_init_torso_y
=
torso_y
height_offset
=
0.
if
self
.
elevated
:
# Increase initial z-pos of ant.
height_offset
=
height
*
size_scaling
torso
=
tree
.
find
(
".//body[@name='torso']"
)
torso
.
set
(
'pos'
,
'0 0 %.2f'
%
(
0.75
+
height_offset
))
if
self
.
blocks
:
# If there are movable blocks, change simulation settings to perform
# better contact detection.
default
=
tree
.
find
(
".//default"
)
default
.
find
(
'.//geom'
).
set
(
'solimp'
,
'.995 .995 .01'
)
for
i
in
range
(
len
(
structure
)):
for
j
in
range
(
len
(
structure
[
0
])):
if
self
.
elevated
and
structure
[
i
][
j
]
not
in
[
-
1
]:
# Create elevated platform.
ET
.
SubElement
(
worldbody
,
"geom"
,
name
=
"elevated_%d_%d"
%
(
i
,
j
),
pos
=
"%f %f %f"
%
(
j
*
size_scaling
-
torso_x
,
i
*
size_scaling
-
torso_y
,
height
/
2
*
size_scaling
),
size
=
"%f %f %f"
%
(
0.5
*
size_scaling
,
0.5
*
size_scaling
,
height
/
2
*
size_scaling
),
type
=
"box"
,
material
=
""
,
contype
=
"1"
,
conaffinity
=
"1"
,
rgba
=
"0.9 0.9 0.9 1"
,
)
if
structure
[
i
][
j
]
==
1
:
# Unmovable block.
# Offset all coordinates so that robot starts at the origin.
ET
.
SubElement
(
worldbody
,
"geom"
,
name
=
"block_%d_%d"
%
(
i
,
j
),
pos
=
"%f %f %f"
%
(
j
*
size_scaling
-
torso_x
,
i
*
size_scaling
-
torso_y
,
height_offset
+
height
/
2
*
size_scaling
),
size
=
"%f %f %f"
%
(
0.5
*
size_scaling
,
0.5
*
size_scaling
,
height
/
2
*
size_scaling
),
type
=
"box"
,
material
=
""
,
contype
=
"1"
,
conaffinity
=
"1"
,
rgba
=
"0.4 0.4 0.4 1"
,
)
elif
maze_env_utils
.
can_move
(
structure
[
i
][
j
]):
# Movable block.
# The "falling" blocks are shrunk slightly and increased in mass to
# ensure that it can fall easily through a gap in the platform blocks.
falling
=
maze_env_utils
.
can_move_z
(
structure
[
i
][
j
])
shrink
=
0.99
if
falling
else
1.0
moveable_body
=
ET
.
SubElement
(
worldbody
,
"body"
,
name
=
"moveable_%d_%d"
%
(
i
,
j
),
pos
=
"%f %f %f"
%
(
j
*
size_scaling
-
torso_x
,
i
*
size_scaling
-
torso_y
,
height_offset
+
height
/
2
*
size_scaling
),
)
ET
.
SubElement
(
moveable_body
,
"geom"
,
name
=
"block_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
size
=
"%f %f %f"
%
(
0.5
*
size_scaling
*
shrink
,
0.5
*
size_scaling
*
shrink
,
height
/
2
*
size_scaling
),
type
=
"box"
,
material
=
""
,
mass
=
"0.001"
if
falling
else
"0.0002"
,
contype
=
"1"
,
conaffinity
=
"1"
,
rgba
=
"0.9 0.1 0.1 1"
)
if
maze_env_utils
.
can_move_x
(
structure
[
i
][
j
]):
ET
.
SubElement
(
moveable_body
,
"joint"
,
armature
=
"0"
,
axis
=
"1 0 0"
,
damping
=
"0.0"
,
limited
=
"true"
if
falling
else
"false"
,
range
=
"%f %f"
%
(
-
size_scaling
,
size_scaling
),
margin
=
"0.01"
,
name
=
"moveable_x_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
type
=
"slide"
)
if
maze_env_utils
.
can_move_y
(
structure
[
i
][
j
]):
ET
.
SubElement
(
moveable_body
,
"joint"
,
armature
=
"0"
,
axis
=
"0 1 0"
,
damping
=
"0.0"
,
limited
=
"true"
if
falling
else
"false"
,
range
=
"%f %f"
%
(
-
size_scaling
,
size_scaling
),
margin
=
"0.01"
,
name
=
"moveable_y_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
type
=
"slide"
)
if
maze_env_utils
.
can_move_z
(
structure
[
i
][
j
]):
ET
.
SubElement
(
moveable_body
,
"joint"
,
armature
=
"0"
,
axis
=
"0 0 1"
,
damping
=
"0.0"
,
limited
=
"true"
,
range
=
"%f 0"
%
(
-
height_offset
),
margin
=
"0.01"
,
name
=
"moveable_z_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
type
=
"slide"
)
torso
=
tree
.
find
(
".//body[@name='torso']"
)
geoms
=
torso
.
findall
(
".//geom"
)
for
geom
in
geoms
:
if
'name'
not
in
geom
.
attrib
:
raise
Exception
(
"Every geom of the torso must have a name "
"defined"
)
_
,
file_path
=
tempfile
.
mkstemp
(
text
=
True
)
tree
.
write
(
file_path
)
self
.
wrapped_env
=
model_cls
(
*
args
,
file_path
=
file_path
,
**
kwargs
)
def
_get_obs
(
self
):
return
np
.
concatenate
([
self
.
wrapped_env
.
_get_obs
(),
[
self
.
t
*
0.001
]])
def
reset
(
self
):
self
.
t
=
0
self
.
wrapped_env
.
reset
()
return
self
.
_get_obs
()
@
property
def
viewer
(
self
):
return
self
.
wrapped_env
.
viewer
def
render
(
self
,
*
args
,
**
kwargs
):
return
self
.
wrapped_env
.
render
(
*
args
,
**
kwargs
)
@
property
def
observation_space
(
self
):
shape
=
self
.
_get_obs
().
shape
high
=
np
.
inf
*
np
.
ones
(
shape
)
low
=
-
high
return
gym
.
spaces
.
Box
(
low
,
high
)
@
property
def
action_space
(
self
):
return
self
.
wrapped_env
.
action_space
def
_find_robot
(
self
):
structure
=
self
.
MAZE_STRUCTURE
size_scaling
=
self
.
MAZE_SIZE_SCALING
for
i
in
range
(
len
(
structure
)):
for
j
in
range
(
len
(
structure
[
0
])):
if
structure
[
i
][
j
]
==
'r'
:
return
j
*
size_scaling
,
i
*
size_scaling
assert
False
,
'No robot in maze specification.'
def
step
(
self
,
action
):
self
.
t
+=
1
inner_next_obs
,
inner_reward
,
done
,
info
=
self
.
wrapped_env
.
step
(
action
)
next_obs
=
self
.
_get_obs
()
done
=
False
return
next_obs
,
inner_reward
,
done
,
info
research/efficient-hrl/environments/maze_env_utils.py
0 → 100755
View file @
d28dee0f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Adapted from rllab maze_env_utils.py."""
import
numpy
as
np
import
math
class
Move
(
object
):
X
=
11
Y
=
12
Z
=
13
XY
=
14
XZ
=
15
YZ
=
16
XYZ
=
17
def
can_move_x
(
movable
):
return
movable
in
[
Move
.
X
,
Move
.
XY
,
Move
.
XZ
,
Move
.
XYZ
]
def
can_move_y
(
movable
):
return
movable
in
[
Move
.
Y
,
Move
.
XY
,
Move
.
YZ
,
Move
.
XYZ
]
def
can_move_z
(
movable
):
return
movable
in
[
Move
.
Z
,
Move
.
XZ
,
Move
.
YZ
,
Move
.
XYZ
]
def
can_move
(
movable
):
return
can_move_x
(
movable
)
or
can_move_y
(
movable
)
or
can_move_z
(
movable
)
def
construct_maze
(
maze_id
=
'Maze'
):
if
maze_id
==
'Maze'
:
structure
=
[
[
1
,
1
,
1
,
1
,
1
],
[
1
,
'r'
,
0
,
0
,
1
],
[
1
,
1
,
1
,
0
,
1
],
[
1
,
0
,
0
,
0
,
1
],
[
1
,
1
,
1
,
1
,
1
],
]
elif
maze_id
==
'Push'
:
structure
=
[
[
1
,
1
,
1
,
1
,
1
],
[
1
,
0
,
'r'
,
1
,
1
],
[
1
,
0
,
Move
.
XY
,
0
,
1
],
[
1
,
1
,
0
,
1
,
1
],
[
1
,
1
,
1
,
1
,
1
],
]
elif
maze_id
==
'Fall'
:
structure
=
[
[
1
,
1
,
1
,
1
],
[
1
,
'r'
,
0
,
1
],
[
1
,
0
,
Move
.
YZ
,
1
],
[
1
,
-
1
,
-
1
,
1
],
[
1
,
0
,
0
,
1
],
[
1
,
1
,
1
,
1
],
]
else
:
raise
NotImplementedError
(
'The provided MazeId %s is not recognized'
%
maze_id
)
return
structure
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment