Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
65da497f
Commit
65da497f
authored
Dec 13, 2018
by
Shining Sun
Browse files
Merge branch 'master' of
https://github.com/tensorflow/models
into cifar_keras
parents
93e0022d
7d032ea3
Changes
186
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2638 additions
and
32 deletions
+2638
-32
research/efficient-hrl/environments/maze_env.py
research/efficient-hrl/environments/maze_env.py
+290
-25
research/efficient-hrl/environments/maze_env_utils.py
research/efficient-hrl/environments/maze_env_utils.py
+90
-2
research/efficient-hrl/environments/point.py
research/efficient-hrl/environments/point.py
+90
-0
research/efficient-hrl/environments/point_maze_env.py
research/efficient-hrl/environments/point_maze_env.py
+8
-1
research/efficient-hrl/eval.py
research/efficient-hrl/eval.py
+460
-0
research/efficient-hrl/run_env.py
research/efficient-hrl/run_env.py
+129
-0
research/efficient-hrl/run_eval.py
research/efficient-hrl/run_eval.py
+51
-0
research/efficient-hrl/run_train.py
research/efficient-hrl/run_train.py
+49
-0
research/efficient-hrl/scripts/local_eval.py
research/efficient-hrl/scripts/local_eval.py
+76
-0
research/efficient-hrl/scripts/local_train.py
research/efficient-hrl/scripts/local_train.py
+76
-0
research/efficient-hrl/train.py
research/efficient-hrl/train.py
+670
-0
research/efficient-hrl/train_utils.py
research/efficient-hrl/train_utils.py
+175
-0
research/efficient-hrl/utils/__init__.py
research/efficient-hrl/utils/__init__.py
+1
-0
research/efficient-hrl/utils/eval_utils.py
research/efficient-hrl/utils/eval_utils.py
+151
-0
research/efficient-hrl/utils/utils.py
research/efficient-hrl/utils/utils.py
+318
-0
research/lstm_object_detection/README
research/lstm_object_detection/README
+1
-1
research/lstm_object_detection/eval.py
research/lstm_object_detection/eval.py
+1
-1
research/lstm_object_detection/inputs/seq_dataset_builder.py
research/lstm_object_detection/inputs/seq_dataset_builder.py
+1
-1
research/lstm_object_detection/inputs/seq_dataset_builder_test.py
.../lstm_object_detection/inputs/seq_dataset_builder_test.py
+1
-1
research/lstm_object_detection/inputs/tf_sequence_example_decoder.py
...tm_object_detection/inputs/tf_sequence_example_decoder.py
+0
-0
No files found.
research/efficient-hrl/environments/maze_env.py
100755 → 100644
View file @
65da497f
...
...
@@ -22,7 +22,7 @@ import math
import
numpy
as
np
import
gym
import
maze_env_utils
from
environments
import
maze_env_utils
# Directory that contains mujoco xml files.
MODEL_DIR
=
'environments/assets'
...
...
@@ -39,6 +39,13 @@ class MazeEnv(gym.Env):
maze_id
=
None
,
maze_height
=
0.5
,
maze_size_scaling
=
8
,
n_bins
=
0
,
sensor_range
=
3.
,
sensor_span
=
2
*
math
.
pi
,
observe_blocks
=
False
,
put_spin_near_agent
=
False
,
top_down_view
=
False
,
manual_collision
=
False
,
*
args
,
**
kwargs
):
self
.
_maze_id
=
maze_id
...
...
@@ -52,6 +59,14 @@ class MazeEnv(gym.Env):
self
.
MAZE_HEIGHT
=
height
=
maze_height
self
.
MAZE_SIZE_SCALING
=
size_scaling
=
maze_size_scaling
self
.
_n_bins
=
n_bins
self
.
_sensor_range
=
sensor_range
*
size_scaling
self
.
_sensor_span
=
sensor_span
self
.
_observe_blocks
=
observe_blocks
self
.
_put_spin_near_agent
=
put_spin_near_agent
self
.
_top_down_view
=
top_down_view
self
.
_manual_collision
=
manual_collision
self
.
MAZE_STRUCTURE
=
structure
=
maze_env_utils
.
construct_maze
(
maze_id
=
self
.
_maze_id
)
self
.
elevated
=
any
(
-
1
in
row
for
row
in
structure
)
# Elevate the maze to allow for falling.
self
.
blocks
=
any
(
...
...
@@ -61,6 +76,13 @@ class MazeEnv(gym.Env):
torso_x
,
torso_y
=
self
.
_find_robot
()
self
.
_init_torso_x
=
torso_x
self
.
_init_torso_y
=
torso_y
self
.
_init_positions
=
[
(
x
-
torso_x
,
y
-
torso_y
)
for
x
,
y
in
self
.
_find_all_robots
()]
self
.
_xy_to_rowcol
=
lambda
x
,
y
:
(
2
+
(
y
+
size_scaling
/
2
)
/
size_scaling
,
2
+
(
x
+
size_scaling
/
2
)
/
size_scaling
)
self
.
_view
=
np
.
zeros
([
5
,
5
,
3
])
# walls (immovable), chasms (fall), movable blocks
height_offset
=
0.
if
self
.
elevated
:
...
...
@@ -74,9 +96,13 @@ class MazeEnv(gym.Env):
default
=
tree
.
find
(
".//default"
)
default
.
find
(
'.//geom'
).
set
(
'solimp'
,
'.995 .995 .01'
)
self
.
movable_blocks
=
[]
for
i
in
range
(
len
(
structure
)):
for
j
in
range
(
len
(
structure
[
0
])):
if
self
.
elevated
and
structure
[
i
][
j
]
not
in
[
-
1
]:
struct
=
structure
[
i
][
j
]
if
struct
==
'r'
and
self
.
_put_spin_near_agent
:
struct
=
maze_env_utils
.
Move
.
SpinXY
if
self
.
elevated
and
struct
not
in
[
-
1
]:
# Create elevated platform.
ET
.
SubElement
(
worldbody
,
"geom"
,
...
...
@@ -93,7 +119,7 @@ class MazeEnv(gym.Env):
conaffinity
=
"1"
,
rgba
=
"0.9 0.9 0.9 1"
,
)
if
struct
ure
[
i
][
j
]
==
1
:
# Unmovable block.
if
struct
==
1
:
# Unmovable block.
# Offset all coordinates so that robot starts at the origin.
ET
.
SubElement
(
worldbody
,
"geom"
,
...
...
@@ -111,26 +137,32 @@ class MazeEnv(gym.Env):
conaffinity
=
"1"
,
rgba
=
"0.4 0.4 0.4 1"
,
)
elif
maze_env_utils
.
can_move
(
struct
ure
[
i
][
j
]
):
# Movable block.
elif
maze_env_utils
.
can_move
(
struct
):
# Movable block.
# The "falling" blocks are shrunk slightly and increased in mass to
# ensure that it can fall easily through a gap in the platform blocks.
falling
=
maze_env_utils
.
can_move_z
(
structure
[
i
][
j
])
shrink
=
0.99
if
falling
else
1.0
moveable_body
=
ET
.
SubElement
(
name
=
"movable_%d_%d"
%
(
i
,
j
)
self
.
movable_blocks
.
append
((
name
,
struct
))
falling
=
maze_env_utils
.
can_move_z
(
struct
)
spinning
=
maze_env_utils
.
can_spin
(
struct
)
x_offset
=
0.25
*
size_scaling
if
spinning
else
0.0
y_offset
=
0.0
shrink
=
0.1
if
spinning
else
0.99
if
falling
else
1.0
height_shrink
=
0.1
if
spinning
else
1.0
movable_body
=
ET
.
SubElement
(
worldbody
,
"body"
,
name
=
"moveable_%d_%d"
%
(
i
,
j
)
,
pos
=
"%f %f %f"
%
(
j
*
size_scaling
-
torso_x
,
i
*
size_scaling
-
torso_y
,
name
=
name
,
pos
=
"%f %f %f"
%
(
j
*
size_scaling
-
torso_x
+
x_offset
,
i
*
size_scaling
-
torso_y
+
y_offset
,
height_offset
+
height
/
2
*
size_scaling
),
height
/
2
*
size_scaling
*
height_shrink
),
)
ET
.
SubElement
(
mov
e
able_body
,
"geom"
,
movable_body
,
"geom"
,
name
=
"block_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
size
=
"%f %f %f"
%
(
0.5
*
size_scaling
*
shrink
,
0.5
*
size_scaling
*
shrink
,
height
/
2
*
size_scaling
),
height
/
2
*
size_scaling
*
height_shrink
),
type
=
"box"
,
material
=
""
,
mass
=
"0.001"
if
falling
else
"0.0002"
,
...
...
@@ -138,45 +170,56 @@ class MazeEnv(gym.Env):
conaffinity
=
"1"
,
rgba
=
"0.9 0.1 0.1 1"
)
if
maze_env_utils
.
can_move_x
(
struct
ure
[
i
][
j
]
):
if
maze_env_utils
.
can_move_x
(
struct
):
ET
.
SubElement
(
mov
e
able_body
,
"joint"
,
movable_body
,
"joint"
,
armature
=
"0"
,
axis
=
"1 0 0"
,
damping
=
"0.0"
,
limited
=
"true"
if
falling
else
"false"
,
range
=
"%f %f"
%
(
-
size_scaling
,
size_scaling
),
margin
=
"0.01"
,
name
=
"mov
e
able_x_%d_%d"
%
(
i
,
j
),
name
=
"movable_x_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
type
=
"slide"
)
if
maze_env_utils
.
can_move_y
(
struct
ure
[
i
][
j
]
):
if
maze_env_utils
.
can_move_y
(
struct
):
ET
.
SubElement
(
mov
e
able_body
,
"joint"
,
movable_body
,
"joint"
,
armature
=
"0"
,
axis
=
"0 1 0"
,
damping
=
"0.0"
,
limited
=
"true"
if
falling
else
"false"
,
range
=
"%f %f"
%
(
-
size_scaling
,
size_scaling
),
margin
=
"0.01"
,
name
=
"mov
e
able_y_%d_%d"
%
(
i
,
j
),
name
=
"movable_y_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
type
=
"slide"
)
if
maze_env_utils
.
can_move_z
(
struct
ure
[
i
][
j
]
):
if
maze_env_utils
.
can_move_z
(
struct
):
ET
.
SubElement
(
mov
e
able_body
,
"joint"
,
movable_body
,
"joint"
,
armature
=
"0"
,
axis
=
"0 0 1"
,
damping
=
"0.0"
,
limited
=
"true"
,
range
=
"%f 0"
%
(
-
height_offset
),
margin
=
"0.01"
,
name
=
"mov
e
able_z_%d_%d"
%
(
i
,
j
),
name
=
"movable_z_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
type
=
"slide"
)
if
maze_env_utils
.
can_spin
(
struct
):
ET
.
SubElement
(
movable_body
,
"joint"
,
armature
=
"0"
,
axis
=
"0 0 1"
,
damping
=
"0.0"
,
limited
=
"false"
,
name
=
"spinable_%d_%d"
%
(
i
,
j
),
pos
=
"0 0 0"
,
type
=
"ball"
)
torso
=
tree
.
find
(
".//body[@name='torso']"
)
geoms
=
torso
.
findall
(
".//geom"
)
...
...
@@ -190,13 +233,203 @@ class MazeEnv(gym.Env):
self
.
wrapped_env
=
model_cls
(
*
args
,
file_path
=
file_path
,
**
kwargs
)
def
get_ori
(
self
):
return
self
.
wrapped_env
.
get_ori
()
def
get_top_down_view
(
self
):
self
.
_view
=
np
.
zeros_like
(
self
.
_view
)
def
valid
(
row
,
col
):
return
self
.
_view
.
shape
[
0
]
>
row
>=
0
and
self
.
_view
.
shape
[
1
]
>
col
>=
0
def
update_view
(
x
,
y
,
d
,
row
=
None
,
col
=
None
):
if
row
is
None
or
col
is
None
:
x
=
x
-
self
.
_robot_x
y
=
y
-
self
.
_robot_y
th
=
self
.
_robot_ori
row
,
col
=
self
.
_xy_to_rowcol
(
x
,
y
)
update_view
(
x
,
y
,
d
,
row
=
row
,
col
=
col
)
return
row
,
row_frac
,
col
,
col_frac
=
int
(
row
),
row
%
1
,
int
(
col
),
col
%
1
if
row_frac
<
0
:
row_frac
+=
1
if
col_frac
<
0
:
col_frac
+=
1
if
valid
(
row
,
col
):
self
.
_view
[
row
,
col
,
d
]
+=
(
(
min
(
1.
,
row_frac
+
0.5
)
-
max
(
0.
,
row_frac
-
0.5
))
*
(
min
(
1.
,
col_frac
+
0.5
)
-
max
(
0.
,
col_frac
-
0.5
)))
if
valid
(
row
-
1
,
col
):
self
.
_view
[
row
-
1
,
col
,
d
]
+=
(
(
max
(
0.
,
0.5
-
row_frac
))
*
(
min
(
1.
,
col_frac
+
0.5
)
-
max
(
0.
,
col_frac
-
0.5
)))
if
valid
(
row
+
1
,
col
):
self
.
_view
[
row
+
1
,
col
,
d
]
+=
(
(
max
(
0.
,
row_frac
-
0.5
))
*
(
min
(
1.
,
col_frac
+
0.5
)
-
max
(
0.
,
col_frac
-
0.5
)))
if
valid
(
row
,
col
-
1
):
self
.
_view
[
row
,
col
-
1
,
d
]
+=
(
(
min
(
1.
,
row_frac
+
0.5
)
-
max
(
0.
,
row_frac
-
0.5
))
*
(
max
(
0.
,
0.5
-
col_frac
)))
if
valid
(
row
,
col
+
1
):
self
.
_view
[
row
,
col
+
1
,
d
]
+=
(
(
min
(
1.
,
row_frac
+
0.5
)
-
max
(
0.
,
row_frac
-
0.5
))
*
(
max
(
0.
,
col_frac
-
0.5
)))
if
valid
(
row
-
1
,
col
-
1
):
self
.
_view
[
row
-
1
,
col
-
1
,
d
]
+=
(
(
max
(
0.
,
0.5
-
row_frac
))
*
max
(
0.
,
0.5
-
col_frac
))
if
valid
(
row
-
1
,
col
+
1
):
self
.
_view
[
row
-
1
,
col
+
1
,
d
]
+=
(
(
max
(
0.
,
0.5
-
row_frac
))
*
max
(
0.
,
col_frac
-
0.5
))
if
valid
(
row
+
1
,
col
+
1
):
self
.
_view
[
row
+
1
,
col
+
1
,
d
]
+=
(
(
max
(
0.
,
row_frac
-
0.5
))
*
max
(
0.
,
col_frac
-
0.5
))
if
valid
(
row
+
1
,
col
-
1
):
self
.
_view
[
row
+
1
,
col
-
1
,
d
]
+=
(
(
max
(
0.
,
row_frac
-
0.5
))
*
max
(
0.
,
0.5
-
col_frac
))
# Draw ant.
robot_x
,
robot_y
=
self
.
wrapped_env
.
get_body_com
(
"torso"
)[:
2
]
self
.
_robot_x
=
robot_x
self
.
_robot_y
=
robot_y
self
.
_robot_ori
=
self
.
get_ori
()
structure
=
self
.
MAZE_STRUCTURE
size_scaling
=
self
.
MAZE_SIZE_SCALING
height
=
self
.
MAZE_HEIGHT
# Draw immovable blocks and chasms.
for
i
in
range
(
len
(
structure
)):
for
j
in
range
(
len
(
structure
[
0
])):
if
structure
[
i
][
j
]
==
1
:
# Wall.
update_view
(
j
*
size_scaling
-
self
.
_init_torso_x
,
i
*
size_scaling
-
self
.
_init_torso_y
,
0
)
if
structure
[
i
][
j
]
==
-
1
:
# Chasm.
update_view
(
j
*
size_scaling
-
self
.
_init_torso_x
,
i
*
size_scaling
-
self
.
_init_torso_y
,
1
)
# Draw movable blocks.
for
block_name
,
block_type
in
self
.
movable_blocks
:
block_x
,
block_y
=
self
.
wrapped_env
.
get_body_com
(
block_name
)[:
2
]
update_view
(
block_x
,
block_y
,
2
)
return
self
.
_view
def
get_range_sensor_obs
(
self
):
"""Returns egocentric range sensor observations of maze."""
robot_x
,
robot_y
,
robot_z
=
self
.
wrapped_env
.
get_body_com
(
"torso"
)[:
3
]
ori
=
self
.
get_ori
()
structure
=
self
.
MAZE_STRUCTURE
size_scaling
=
self
.
MAZE_SIZE_SCALING
height
=
self
.
MAZE_HEIGHT
segments
=
[]
# Get line segments (corresponding to outer boundary) of each immovable
# block or drop-off.
for
i
in
range
(
len
(
structure
)):
for
j
in
range
(
len
(
structure
[
0
])):
if
structure
[
i
][
j
]
in
[
1
,
-
1
]:
# There's a wall or drop-off.
cx
=
j
*
size_scaling
-
self
.
_init_torso_x
cy
=
i
*
size_scaling
-
self
.
_init_torso_y
x1
=
cx
-
0.5
*
size_scaling
x2
=
cx
+
0.5
*
size_scaling
y1
=
cy
-
0.5
*
size_scaling
y2
=
cy
+
0.5
*
size_scaling
struct_segments
=
[
((
x1
,
y1
),
(
x2
,
y1
)),
((
x2
,
y1
),
(
x2
,
y2
)),
((
x2
,
y2
),
(
x1
,
y2
)),
((
x1
,
y2
),
(
x1
,
y1
)),
]
for
seg
in
struct_segments
:
segments
.
append
(
dict
(
segment
=
seg
,
type
=
structure
[
i
][
j
],
))
# Get line segments (corresponding to outer boundary) of each movable
# block within the agent's z-view.
for
block_name
,
block_type
in
self
.
movable_blocks
:
block_x
,
block_y
,
block_z
=
self
.
wrapped_env
.
get_body_com
(
block_name
)[:
3
]
if
(
block_z
+
height
*
size_scaling
/
2
>=
robot_z
and
robot_z
>=
block_z
-
height
*
size_scaling
/
2
):
# Block in view.
x1
=
block_x
-
0.5
*
size_scaling
x2
=
block_x
+
0.5
*
size_scaling
y1
=
block_y
-
0.5
*
size_scaling
y2
=
block_y
+
0.5
*
size_scaling
struct_segments
=
[
((
x1
,
y1
),
(
x2
,
y1
)),
((
x2
,
y1
),
(
x2
,
y2
)),
((
x2
,
y2
),
(
x1
,
y2
)),
((
x1
,
y2
),
(
x1
,
y1
)),
]
for
seg
in
struct_segments
:
segments
.
append
(
dict
(
segment
=
seg
,
type
=
block_type
,
))
sensor_readings
=
np
.
zeros
((
self
.
_n_bins
,
3
))
# 3 for wall, drop-off, block
for
ray_idx
in
range
(
self
.
_n_bins
):
ray_ori
=
(
ori
-
self
.
_sensor_span
*
0.5
+
(
2
*
ray_idx
+
1.0
)
/
(
2
*
self
.
_n_bins
)
*
self
.
_sensor_span
)
ray_segments
=
[]
# Get all segments that intersect with ray.
for
seg
in
segments
:
p
=
maze_env_utils
.
ray_segment_intersect
(
ray
=
((
robot_x
,
robot_y
),
ray_ori
),
segment
=
seg
[
"segment"
])
if
p
is
not
None
:
ray_segments
.
append
(
dict
(
segment
=
seg
[
"segment"
],
type
=
seg
[
"type"
],
ray_ori
=
ray_ori
,
distance
=
maze_env_utils
.
point_distance
(
p
,
(
robot_x
,
robot_y
)),
))
if
len
(
ray_segments
)
>
0
:
# Find out which segment is intersected first.
first_seg
=
sorted
(
ray_segments
,
key
=
lambda
x
:
x
[
"distance"
])[
0
]
seg_type
=
first_seg
[
"type"
]
idx
=
(
0
if
seg_type
==
1
else
# Wall.
1
if
seg_type
==
-
1
else
# Drop-off.
2
if
maze_env_utils
.
can_move
(
seg_type
)
else
# Block.
None
)
if
first_seg
[
"distance"
]
<=
self
.
_sensor_range
:
sensor_readings
[
ray_idx
][
idx
]
=
(
self
.
_sensor_range
-
first_seg
[
"distance"
])
/
self
.
_sensor_range
return
sensor_readings
def
_get_obs
(
self
):
return
np
.
concatenate
([
self
.
wrapped_env
.
_get_obs
(),
[
self
.
t
*
0.001
]])
wrapped_obs
=
self
.
wrapped_env
.
_get_obs
()
if
self
.
_top_down_view
:
view
=
[
self
.
get_top_down_view
().
flat
]
else
:
view
=
[]
if
self
.
_observe_blocks
:
additional_obs
=
[]
for
block_name
,
block_type
in
self
.
movable_blocks
:
additional_obs
.
append
(
self
.
wrapped_env
.
get_body_com
(
block_name
))
wrapped_obs
=
np
.
concatenate
([
wrapped_obs
[:
3
]]
+
additional_obs
+
[
wrapped_obs
[
3
:]])
range_sensor_obs
=
self
.
get_range_sensor_obs
()
return
np
.
concatenate
([
wrapped_obs
,
range_sensor_obs
.
flat
]
+
view
+
[[
self
.
t
*
0.001
]])
def
reset
(
self
):
self
.
t
=
0
self
.
trajectory
=
[]
self
.
wrapped_env
.
reset
()
if
len
(
self
.
_init_positions
)
>
1
:
xy
=
random
.
choice
(
self
.
_init_positions
)
self
.
wrapped_env
.
set_xy
(
xy
)
return
self
.
_get_obs
()
@
property
...
...
@@ -226,9 +459,41 @@ class MazeEnv(gym.Env):
return
j
*
size_scaling
,
i
*
size_scaling
assert
False
,
'No robot in maze specification.'
def
_find_all_robots
(
self
):
structure
=
self
.
MAZE_STRUCTURE
size_scaling
=
self
.
MAZE_SIZE_SCALING
coords
=
[]
for
i
in
range
(
len
(
structure
)):
for
j
in
range
(
len
(
structure
[
0
])):
if
structure
[
i
][
j
]
==
'r'
:
coords
.
append
((
j
*
size_scaling
,
i
*
size_scaling
))
return
coords
def
_is_in_collision
(
self
,
pos
):
x
,
y
=
pos
structure
=
self
.
MAZE_STRUCTURE
size_scaling
=
self
.
MAZE_SIZE_SCALING
for
i
in
range
(
len
(
structure
)):
for
j
in
range
(
len
(
structure
[
0
])):
if
structure
[
i
][
j
]
==
1
:
minx
=
j
*
size_scaling
-
size_scaling
*
0.5
-
self
.
_init_torso_x
maxx
=
j
*
size_scaling
+
size_scaling
*
0.5
-
self
.
_init_torso_x
miny
=
i
*
size_scaling
-
size_scaling
*
0.5
-
self
.
_init_torso_y
maxy
=
i
*
size_scaling
+
size_scaling
*
0.5
-
self
.
_init_torso_y
if
minx
<=
x
<=
maxx
and
miny
<=
y
<=
maxy
:
return
True
return
False
def
step
(
self
,
action
):
self
.
t
+=
1
inner_next_obs
,
inner_reward
,
done
,
info
=
self
.
wrapped_env
.
step
(
action
)
if
self
.
_manual_collision
:
old_pos
=
self
.
wrapped_env
.
get_xy
()
inner_next_obs
,
inner_reward
,
done
,
info
=
self
.
wrapped_env
.
step
(
action
)
new_pos
=
self
.
wrapped_env
.
get_xy
()
if
self
.
_is_in_collision
(
new_pos
):
self
.
wrapped_env
.
set_xy
(
old_pos
)
else
:
inner_next_obs
,
inner_reward
,
done
,
info
=
self
.
wrapped_env
.
step
(
action
)
next_obs
=
self
.
_get_obs
()
done
=
False
return
next_obs
,
inner_reward
,
done
,
info
research/efficient-hrl/environments/maze_env_utils.py
100755 → 100644
View file @
65da497f
...
...
@@ -26,20 +26,27 @@ class Move(object):
XZ
=
15
YZ
=
16
XYZ
=
17
SpinXY
=
18
def
can_move_x
(
movable
):
return
movable
in
[
Move
.
X
,
Move
.
XY
,
Move
.
XZ
,
Move
.
XYZ
]
return
movable
in
[
Move
.
X
,
Move
.
XY
,
Move
.
XZ
,
Move
.
XYZ
,
Move
.
SpinXY
]
def
can_move_y
(
movable
):
return
movable
in
[
Move
.
Y
,
Move
.
XY
,
Move
.
YZ
,
Move
.
XYZ
]
return
movable
in
[
Move
.
Y
,
Move
.
XY
,
Move
.
YZ
,
Move
.
XYZ
,
Move
.
SpinXY
]
def
can_move_z
(
movable
):
return
movable
in
[
Move
.
Z
,
Move
.
XZ
,
Move
.
YZ
,
Move
.
XYZ
]
def
can_spin
(
movable
):
return
movable
in
[
Move
.
SpinXY
]
def
can_move
(
movable
):
return
can_move_x
(
movable
)
or
can_move_y
(
movable
)
or
can_move_z
(
movable
)
...
...
@@ -70,7 +77,88 @@ def construct_maze(maze_id='Maze'):
[
1
,
0
,
0
,
1
],
[
1
,
1
,
1
,
1
],
]
elif
maze_id
==
'Block'
:
O
=
'r'
structure
=
[
[
1
,
1
,
1
,
1
,
1
],
[
1
,
O
,
0
,
0
,
1
],
[
1
,
0
,
0
,
0
,
1
],
[
1
,
0
,
0
,
0
,
1
],
[
1
,
1
,
1
,
1
,
1
],
]
elif
maze_id
==
'BlockMaze'
:
O
=
'r'
structure
=
[
[
1
,
1
,
1
,
1
],
[
1
,
O
,
0
,
1
],
[
1
,
1
,
0
,
1
],
[
1
,
0
,
0
,
1
],
[
1
,
1
,
1
,
1
],
]
else
:
raise
NotImplementedError
(
'The provided MazeId %s is not recognized'
%
maze_id
)
return
structure
def
line_intersect
(
pt1
,
pt2
,
ptA
,
ptB
):
"""
Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html
this returns the intersection of Line(pt1,pt2) and Line(ptA,ptB)
"""
DET_TOLERANCE
=
0.00000001
# the first line is pt1 + r*(pt2-pt1)
# in component form:
x1
,
y1
=
pt1
x2
,
y2
=
pt2
dx1
=
x2
-
x1
dy1
=
y2
-
y1
# the second line is ptA + s*(ptB-ptA)
x
,
y
=
ptA
xB
,
yB
=
ptB
dx
=
xB
-
x
dy
=
yB
-
y
DET
=
(
-
dx1
*
dy
+
dy1
*
dx
)
if
math
.
fabs
(
DET
)
<
DET_TOLERANCE
:
return
(
0
,
0
,
0
,
0
,
0
)
# now, the determinant should be OK
DETinv
=
1.0
/
DET
# find the scalar amount along the "self" segment
r
=
DETinv
*
(
-
dy
*
(
x
-
x1
)
+
dx
*
(
y
-
y1
))
# find the scalar amount along the input line
s
=
DETinv
*
(
-
dy1
*
(
x
-
x1
)
+
dx1
*
(
y
-
y1
))
# return the average of the two descriptions
xi
=
(
x1
+
r
*
dx1
+
x
+
s
*
dx
)
/
2.0
yi
=
(
y1
+
r
*
dy1
+
y
+
s
*
dy
)
/
2.0
return
(
xi
,
yi
,
1
,
r
,
s
)
def
ray_segment_intersect
(
ray
,
segment
):
"""
Check if the ray originated from (x, y) with direction theta intersects the line segment (x1, y1) -- (x2, y2),
and return the intersection point if there is one
"""
(
x
,
y
),
theta
=
ray
# (x1, y1), (x2, y2) = segment
pt1
=
(
x
,
y
)
len
=
1
pt2
=
(
x
+
len
*
math
.
cos
(
theta
),
y
+
len
*
math
.
sin
(
theta
))
xo
,
yo
,
valid
,
r
,
s
=
line_intersect
(
pt1
,
pt2
,
*
segment
)
if
valid
and
r
>=
0
and
0
<=
s
<=
1
:
return
(
xo
,
yo
)
return
None
def
point_distance
(
p1
,
p2
):
x1
,
y1
=
p1
x2
,
y2
=
p2
return
((
x1
-
x2
)
**
2
+
(
y1
-
y2
)
**
2
)
**
0.5
research/efficient-hrl/environments/point.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Wrapper for creating the ant environment in gym_mujoco."""
import
math
import
numpy
as
np
from
gym
import
utils
from
gym.envs.mujoco
import
mujoco_env
class
PointEnv
(
mujoco_env
.
MujocoEnv
,
utils
.
EzPickle
):
FILE
=
"point.xml"
ORI_IND
=
2
def
__init__
(
self
,
file_path
=
None
,
expose_all_qpos
=
True
):
self
.
_expose_all_qpos
=
expose_all_qpos
mujoco_env
.
MujocoEnv
.
__init__
(
self
,
file_path
,
1
)
utils
.
EzPickle
.
__init__
(
self
)
@
property
def
physics
(
self
):
return
self
.
model
def
_step
(
self
,
a
):
return
self
.
step
(
a
)
def
step
(
self
,
action
):
action
[
0
]
=
0.2
*
action
[
0
]
qpos
=
np
.
copy
(
self
.
physics
.
data
.
qpos
)
qpos
[
2
]
+=
action
[
1
]
ori
=
qpos
[
2
]
# compute increment in each direction
dx
=
math
.
cos
(
ori
)
*
action
[
0
]
dy
=
math
.
sin
(
ori
)
*
action
[
0
]
# ensure that the robot is within reasonable range
qpos
[
0
]
=
np
.
clip
(
qpos
[
0
]
+
dx
,
-
100
,
100
)
qpos
[
1
]
=
np
.
clip
(
qpos
[
1
]
+
dy
,
-
100
,
100
)
qvel
=
self
.
physics
.
data
.
qvel
self
.
set_state
(
qpos
,
qvel
)
for
_
in
range
(
0
,
self
.
frame_skip
):
self
.
physics
.
step
()
next_obs
=
self
.
_get_obs
()
reward
=
0
done
=
False
info
=
{}
return
next_obs
,
reward
,
done
,
info
def
_get_obs
(
self
):
if
self
.
_expose_all_qpos
:
return
np
.
concatenate
([
self
.
physics
.
data
.
qpos
.
flat
[:
3
],
# Only point-relevant coords.
self
.
physics
.
data
.
qvel
.
flat
[:
3
]])
return
np
.
concatenate
([
self
.
physics
.
data
.
qpos
.
flat
[
2
:
3
],
self
.
physics
.
data
.
qvel
.
flat
[:
3
]])
def
reset_model
(
self
):
qpos
=
self
.
init_qpos
+
self
.
np_random
.
uniform
(
size
=
self
.
physics
.
model
.
nq
,
low
=-
.
1
,
high
=
.
1
)
qvel
=
self
.
init_qvel
+
self
.
np_random
.
randn
(
self
.
physics
.
model
.
nv
)
*
.
1
# Set everything other than point to original position and 0 velocity.
qpos
[
3
:]
=
self
.
init_qpos
[
3
:]
qvel
[
3
:]
=
0.
self
.
set_state
(
qpos
,
qvel
)
return
self
.
_get_obs
()
def
get_ori
(
self
):
return
self
.
model
.
data
.
qpos
[
self
.
__class__
.
ORI_IND
]
def
set_xy
(
self
,
xy
):
qpos
=
np
.
copy
(
self
.
physics
.
data
.
qpos
)
qpos
[
0
]
=
xy
[
0
]
qpos
[
1
]
=
xy
[
1
]
qvel
=
self
.
physics
.
data
.
qvel
research/
astronet/astronet/astro_fc_model/__
in
i
t_
_
.py
→
research/
efficient-hrl/environments/po
int_
maze_env
.py
View file @
65da497f
# Copyright 2018 The TensorFlow Authors.
# Copyright 2018 The TensorFlow Authors
All Rights Reserved
.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,4 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from
environments.maze_env
import
MazeEnv
from
environments.point
import
PointEnv
class
PointMazeEnv
(
MazeEnv
):
MODEL_CLASS
=
PointEnv
research/efficient-hrl/eval.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Script for evaluating a UVF agent.
To run locally: See run_eval.py
To run on borg: See train_eval.borg
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
tensorflow
as
tf
slim
=
tf
.
contrib
.
slim
import
gin.tf
# pylint: disable=unused-import
import
agent
import
train
from
utils
import
utils
as
uvf_utils
from
utils
import
eval_utils
from
environments
import
create_maze_env
# pylint: enable=unused-import
flags
=
tf
.
app
.
flags
flags
.
DEFINE_string
(
'eval_dir'
,
None
,
'Directory for writing logs/summaries during eval.'
)
flags
.
DEFINE_string
(
'checkpoint_dir'
,
None
,
'Directory containing checkpoints to eval.'
)
FLAGS
=
flags
.
FLAGS
def
get_evaluate_checkpoint_fn
(
master
,
output_dir
,
eval_step_fns
,
model_rollout_fn
,
gamma
,
max_steps_per_episode
,
num_episodes_eval
,
num_episodes_videos
,
tuner_hook
,
generate_videos
,
generate_summaries
,
video_settings
):
"""Returns a function that evaluates a given checkpoint.
Args:
master: BNS name of the TensorFlow master
output_dir: The output directory to which the metric summaries are written.
eval_step_fns: A dictionary of a functions that return a list of
[state, action, reward, discount, transition_type] tensors,
indexed by summary tag name.
model_rollout_fn: Model rollout fn.
gamma: Discount factor for the reward.
max_steps_per_episode: Maximum steps to run each episode for.
num_episodes_eval: Number of episodes to evaluate and average reward over.
num_episodes_videos: Number of episodes to record for video.
tuner_hook: A callable(average reward, global step) that updates a Vizier
tuner trial.
generate_videos: Whether to generate videos of the agent in action.
generate_summaries: Whether to generate summaries.
video_settings: Settings for generating videos of the agent.
Returns:
A function that evaluates a checkpoint.
"""
sess
=
tf
.
Session
(
master
,
graph
=
tf
.
get_default_graph
())
sess
.
run
(
tf
.
global_variables_initializer
())
sess
.
run
(
tf
.
local_variables_initializer
())
summary_writer
=
tf
.
summary
.
FileWriter
(
output_dir
)
def
evaluate_checkpoint
(
checkpoint_path
):
"""Performs a one-time evaluation of the given checkpoint.
Args:
checkpoint_path: Checkpoint to evaluate.
Returns:
True if the evaluation process should stop
"""
restore_fn
=
tf
.
contrib
.
framework
.
assign_from_checkpoint_fn
(
checkpoint_path
,
uvf_utils
.
get_all_vars
(),
ignore_missing_vars
=
True
,
reshape_variables
=
False
)
assert
restore_fn
is
not
None
,
'cannot restore %s'
%
checkpoint_path
restore_fn
(
sess
)
global_step
=
sess
.
run
(
slim
.
get_global_step
())
should_stop
=
False
max_reward
=
-
1e10
max_meta_reward
=
-
1e10
for
eval_tag
,
(
eval_step
,
env_base
,)
in
sorted
(
eval_step_fns
.
items
()):
if
hasattr
(
env_base
,
'set_sess'
):
env_base
.
set_sess
(
sess
)
# set session
if
generate_summaries
:
tf
.
logging
.
info
(
'[%s] Computing average reward over %d episodes at global step %d.'
,
eval_tag
,
num_episodes_eval
,
global_step
)
(
average_reward
,
last_reward
,
average_meta_reward
,
last_meta_reward
,
average_success
,
states
,
actions
)
=
eval_utils
.
compute_average_reward
(
sess
,
env_base
,
eval_step
,
gamma
,
max_steps_per_episode
,
num_episodes_eval
)
tf
.
logging
.
info
(
'[%s] Average reward = %f'
,
eval_tag
,
average_reward
)
tf
.
logging
.
info
(
'[%s] Last reward = %f'
,
eval_tag
,
last_reward
)
tf
.
logging
.
info
(
'[%s] Average meta reward = %f'
,
eval_tag
,
average_meta_reward
)
tf
.
logging
.
info
(
'[%s] Last meta reward = %f'
,
eval_tag
,
last_meta_reward
)
tf
.
logging
.
info
(
'[%s] Average success = %f'
,
eval_tag
,
average_success
)
if
model_rollout_fn
is
not
None
:
preds
,
model_losses
=
eval_utils
.
compute_model_loss
(
sess
,
model_rollout_fn
,
states
,
actions
)
for
i
,
(
pred
,
state
,
model_loss
)
in
enumerate
(
zip
(
preds
,
states
,
model_losses
)):
tf
.
logging
.
info
(
'[%s] Model rollout step %d: loss=%f'
,
eval_tag
,
i
,
model_loss
)
tf
.
logging
.
info
(
'[%s] Model rollout step %d: pred=%s'
,
eval_tag
,
i
,
str
(
pred
.
tolist
()))
tf
.
logging
.
info
(
'[%s] Model rollout step %d: state=%s'
,
eval_tag
,
i
,
str
(
state
.
tolist
()))
# Report the eval stats to the tuner.
if
average_reward
>
max_reward
:
max_reward
=
average_reward
if
average_meta_reward
>
max_meta_reward
:
max_meta_reward
=
average_meta_reward
for
(
tag
,
value
)
in
[(
'Reward/average_%s_reward'
,
average_reward
),
(
'Reward/last_%s_reward'
,
last_reward
),
(
'Reward/average_%s_meta_reward'
,
average_meta_reward
),
(
'Reward/last_%s_meta_reward'
,
last_meta_reward
),
(
'Reward/average_%s_success'
,
average_success
)]:
summary_str
=
tf
.
Summary
(
value
=
[
tf
.
Summary
.
Value
(
tag
=
tag
%
eval_tag
,
simple_value
=
value
)
])
summary_writer
.
add_summary
(
summary_str
,
global_step
)
summary_writer
.
flush
()
if
generate_videos
or
should_stop
:
# Do a manual reset before generating the video to see the initial
# pose of the robot, towards which the reset controller is moving.
if
hasattr
(
env_base
,
'_gym_env'
):
tf
.
logging
.
info
(
'Resetting before recording video'
)
if
hasattr
(
env_base
.
_gym_env
,
'reset_model'
):
env_base
.
_gym_env
.
reset_model
()
# pylint: disable=protected-access
else
:
env_base
.
_gym_env
.
wrapped_env
.
reset_model
()
video_filename
=
os
.
path
.
join
(
output_dir
,
'videos'
,
'%s_step_%d.mp4'
%
(
eval_tag
,
global_step
))
eval_utils
.
capture_video
(
sess
,
eval_step
,
env_base
,
max_steps_per_episode
*
num_episodes_videos
,
video_filename
,
video_settings
,
reset_every
=
max_steps_per_episode
)
should_stop
=
should_stop
or
(
generate_summaries
and
tuner_hook
and
tuner_hook
(
max_reward
,
global_step
))
return
bool
(
should_stop
)
return
evaluate_checkpoint
def
get_model_rollout
(
uvf_agent
,
tf_env
):
"""Model rollout function."""
state_spec
=
tf_env
.
observation_spec
()[
0
]
action_spec
=
tf_env
.
action_spec
()[
0
]
state_ph
=
tf
.
placeholder
(
dtype
=
state_spec
.
dtype
,
shape
=
state_spec
.
shape
)
action_ph
=
tf
.
placeholder
(
dtype
=
action_spec
.
dtype
,
shape
=
action_spec
.
shape
)
merged_state
=
uvf_agent
.
merged_state
(
state_ph
)
diff_value
=
uvf_agent
.
critic_net
(
tf
.
expand_dims
(
merged_state
,
0
),
tf
.
expand_dims
(
action_ph
,
0
))[
0
]
diff_value
=
tf
.
cast
(
diff_value
,
dtype
=
state_ph
.
dtype
)
state_ph
.
shape
.
assert_is_compatible_with
(
diff_value
.
shape
)
next_state
=
state_ph
+
diff_value
def
model_rollout_fn
(
sess
,
state
,
action
):
return
sess
.
run
(
next_state
,
feed_dict
=
{
state_ph
:
state
,
action_ph
:
action
})
return
model_rollout_fn
def
get_eval_step
(
uvf_agent
,
state_preprocess
,
tf_env
,
action_fn
,
meta_action_fn
,
environment_steps
,
num_episodes
,
mode
=
'eval'
):
"""Get one-step policy/env stepping ops.
Args:
uvf_agent: A UVF agent.
tf_env: A TFEnvironment.
action_fn: A function to produce actions given current state.
meta_action_fn: A function to produce meta actions given current state.
environment_steps: A variable to count the number of steps in the tf_env.
num_episodes: A variable to count the number of episodes.
mode: a string representing the mode=[train, explore, eval].
Returns:
A collect_experience_op that excute an action and store into the
replay_buffer
"""
tf_env
.
start_collect
()
state
=
tf_env
.
current_obs
()
action
=
action_fn
(
state
,
context
=
None
)
state_repr
=
state_preprocess
(
state
)
action_spec
=
tf_env
.
action_spec
()
action_ph
=
tf
.
placeholder
(
dtype
=
action_spec
.
dtype
,
shape
=
action_spec
.
shape
)
with
tf
.
control_dependencies
([
state
]):
transition_type
,
reward
,
discount
=
tf_env
.
step
(
action_ph
)
def
increment_step
():
return
environment_steps
.
assign_add
(
1
)
def
increment_episode
():
return
num_episodes
.
assign_add
(
1
)
def
no_op_int
():
return
tf
.
constant
(
0
,
dtype
=
tf
.
int64
)
step_cond
=
uvf_agent
.
step_cond_fn
(
state
,
action
,
transition_type
,
environment_steps
,
num_episodes
)
reset_episode_cond
=
uvf_agent
.
reset_episode_cond_fn
(
state
,
action
,
transition_type
,
environment_steps
,
num_episodes
)
reset_env_cond
=
uvf_agent
.
reset_env_cond_fn
(
state
,
action
,
transition_type
,
environment_steps
,
num_episodes
)
increment_step_op
=
tf
.
cond
(
step_cond
,
increment_step
,
no_op_int
)
with
tf
.
control_dependencies
([
increment_step_op
]):
increment_episode_op
=
tf
.
cond
(
reset_episode_cond
,
increment_episode
,
no_op_int
)
with
tf
.
control_dependencies
([
reward
,
discount
]):
next_state
=
tf_env
.
current_obs
()
next_state_repr
=
state_preprocess
(
next_state
)
with
tf
.
control_dependencies
([
increment_episode_op
]):
post_reward
,
post_meta_reward
=
uvf_agent
.
cond_begin_episode_op
(
tf
.
logical_not
(
reset_episode_cond
),
[
state
,
action_ph
,
reward
,
next_state
,
state_repr
,
next_state_repr
],
mode
=
mode
,
meta_action_fn
=
meta_action_fn
)
# Important: do manual reset after getting the final reward from the
# unreset environment.
with
tf
.
control_dependencies
([
post_reward
,
post_meta_reward
]):
cond_reset_op
=
tf
.
cond
(
reset_env_cond
,
tf_env
.
reset
,
tf_env
.
current_time_step
)
# Add a dummy control dependency to force the reset_op to run
with
tf
.
control_dependencies
(
cond_reset_op
):
post_reward
,
post_meta_reward
=
map
(
tf
.
identity
,
[
post_reward
,
post_meta_reward
])
eval_step
=
[
next_state
,
action_ph
,
transition_type
,
post_reward
,
post_meta_reward
,
discount
,
uvf_agent
.
context_vars
,
state_repr
]
if
callable
(
action
):
def
step_fn
(
sess
):
action_value
=
action
(
sess
)
return
sess
.
run
(
eval_step
,
feed_dict
=
{
action_ph
:
action_value
})
else
:
action
=
uvf_utils
.
clip_to_spec
(
action
,
action_spec
)
def
step_fn
(
sess
):
action_value
=
sess
.
run
(
action
)
return
sess
.
run
(
eval_step
,
feed_dict
=
{
action_ph
:
action_value
})
return
step_fn
@
gin
.
configurable
def
evaluate
(
checkpoint_dir
,
eval_dir
,
environment
=
None
,
num_bin_actions
=
3
,
agent_class
=
None
,
meta_agent_class
=
None
,
state_preprocess_class
=
None
,
gamma
=
1.0
,
num_episodes_eval
=
10
,
eval_interval_secs
=
60
,
max_number_of_evaluations
=
None
,
checkpoint_timeout
=
None
,
timeout_fn
=
None
,
tuner_hook
=
None
,
generate_videos
=
False
,
generate_summaries
=
True
,
num_episodes_videos
=
5
,
video_settings
=
None
,
eval_modes
=
(
'eval'
,),
eval_model_rollout
=
False
,
policy_save_dir
=
'policy'
,
checkpoint_range
=
None
,
checkpoint_path
=
None
,
max_steps_per_episode
=
None
,
evaluate_nohrl
=
False
):
"""Loads and repeatedly evaluates a checkpointed model at a set interval.
Args:
checkpoint_dir: The directory where the checkpoints reside.
eval_dir: Directory to save the evaluation summary results.
environment: A BaseEnvironment to evaluate.
num_bin_actions: Number of bins for discretizing continuous actions.
agent_class: An RL agent class.
meta_agent_class: A Meta agent class.
gamma: Discount factor for the reward.
num_episodes_eval: Number of episodes to evaluate and average reward over.
eval_interval_secs: The number of seconds between each evaluation run.
max_number_of_evaluations: The max number of evaluations. If None the
evaluation continues indefinitely.
checkpoint_timeout: The maximum amount of time to wait between checkpoints.
If left as `None`, then the process will wait indefinitely.
timeout_fn: Optional function to call after a timeout.
tuner_hook: A callable that takes the average reward and global step and
updates a Vizier tuner trial.
generate_videos: Whether to generate videos of the agent in action.
generate_summaries: Whether to generate summaries.
num_episodes_videos: Number of episodes to evaluate for generating videos.
video_settings: Settings for generating videos of the agent.
optimal action based on the critic.
eval_modes: A tuple of eval modes.
eval_model_rollout: Evaluate model rollout.
policy_save_dir: Optional sub-directory where the policies are
saved.
checkpoint_range: Optional. If provided, evaluate all checkpoints in
the range.
checkpoint_path: Optional sub-directory specifying which checkpoint to
evaluate. If None, will evaluate the most recent checkpoint.
"""
tf_env
=
create_maze_env
.
TFPyEnvironment
(
environment
)
observation_spec
=
[
tf_env
.
observation_spec
()]
action_spec
=
[
tf_env
.
action_spec
()]
assert
max_steps_per_episode
,
'max_steps_per_episode need to be set'
if
agent_class
.
ACTION_TYPE
==
'discrete'
:
assert
False
else
:
assert
agent_class
.
ACTION_TYPE
==
'continuous'
if
meta_agent_class
is
not
None
:
assert
agent_class
.
ACTION_TYPE
==
meta_agent_class
.
ACTION_TYPE
with
tf
.
variable_scope
(
'meta_agent'
):
meta_agent
=
meta_agent_class
(
observation_spec
,
action_spec
,
tf_env
,
)
else
:
meta_agent
=
None
with
tf
.
variable_scope
(
'uvf_agent'
):
uvf_agent
=
agent_class
(
observation_spec
,
action_spec
,
tf_env
,
)
uvf_agent
.
set_meta_agent
(
agent
=
meta_agent
)
with
tf
.
variable_scope
(
'state_preprocess'
):
state_preprocess
=
state_preprocess_class
()
# run both actor and critic once to ensure networks are initialized
# and gin configs will be saved
# pylint: disable=protected-access
temp_states
=
tf
.
expand_dims
(
tf
.
zeros
(
dtype
=
uvf_agent
.
_observation_spec
.
dtype
,
shape
=
uvf_agent
.
_observation_spec
.
shape
),
0
)
# pylint: enable=protected-access
temp_actions
=
uvf_agent
.
actor_net
(
temp_states
)
uvf_agent
.
critic_net
(
temp_states
,
temp_actions
)
# create eval_step_fns for each action function
eval_step_fns
=
dict
()
meta_agent
=
uvf_agent
.
meta_agent
for
meta
in
[
True
]
+
[
False
]
*
evaluate_nohrl
:
meta_tag
=
'hrl'
if
meta
else
'nohrl'
uvf_agent
.
set_meta_agent
(
meta_agent
if
meta
else
None
)
for
mode
in
eval_modes
:
# wrap environment
wrapped_environment
=
uvf_agent
.
get_env_base_wrapper
(
environment
,
mode
=
mode
)
action_wrapper
=
lambda
agent_
:
agent_
.
action
action_fn
=
action_wrapper
(
uvf_agent
)
meta_action_fn
=
action_wrapper
(
meta_agent
)
eval_step_fns
[
'%s_%s'
%
(
mode
,
meta_tag
)]
=
(
get_eval_step
(
uvf_agent
=
uvf_agent
,
state_preprocess
=
state_preprocess
,
tf_env
=
tf_env
,
action_fn
=
action_fn
,
meta_action_fn
=
meta_action_fn
,
environment_steps
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int64
,
name
=
'environment_steps'
),
num_episodes
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int64
,
name
=
'num_episodes'
),
mode
=
mode
),
wrapped_environment
,)
model_rollout_fn
=
None
if
eval_model_rollout
:
model_rollout_fn
=
get_model_rollout
(
uvf_agent
,
tf_env
)
tf
.
train
.
get_or_create_global_step
()
if
policy_save_dir
:
checkpoint_dir
=
os
.
path
.
join
(
checkpoint_dir
,
policy_save_dir
)
tf
.
logging
.
info
(
'Evaluating policies at %s'
,
checkpoint_dir
)
tf
.
logging
.
info
(
'Running episodes for max %d steps'
,
max_steps_per_episode
)
evaluate_checkpoint_fn
=
get_evaluate_checkpoint_fn
(
''
,
eval_dir
,
eval_step_fns
,
model_rollout_fn
,
gamma
,
max_steps_per_episode
,
num_episodes_eval
,
num_episodes_videos
,
tuner_hook
,
generate_videos
,
generate_summaries
,
video_settings
)
if
checkpoint_path
is
not
None
:
checkpoint_path
=
os
.
path
.
join
(
checkpoint_dir
,
checkpoint_path
)
evaluate_checkpoint_fn
(
checkpoint_path
)
elif
checkpoint_range
is
not
None
:
model_files
=
tf
.
gfile
.
Glob
(
os
.
path
.
join
(
checkpoint_dir
,
'model.ckpt-*.index'
))
tf
.
logging
.
info
(
'Found %s policies at %s'
,
len
(
model_files
),
checkpoint_dir
)
model_files
=
{
int
(
f
.
split
(
'model.ckpt-'
,
1
)[
1
].
split
(
'.'
,
1
)[
0
]):
os
.
path
.
splitext
(
f
)[
0
]
for
f
in
model_files
}
model_files
=
{
k
:
v
for
k
,
v
in
model_files
.
items
()
if
k
>=
checkpoint_range
[
0
]
and
k
<=
checkpoint_range
[
1
]
}
tf
.
logging
.
info
(
'Evaluating %d policies at %s'
,
len
(
model_files
),
checkpoint_dir
)
for
_
,
checkpoint_path
in
sorted
(
model_files
.
items
()):
evaluate_checkpoint_fn
(
checkpoint_path
)
else
:
eval_utils
.
evaluate_checkpoint_repeatedly
(
checkpoint_dir
,
evaluate_checkpoint_fn
,
eval_interval_secs
=
eval_interval_secs
,
max_number_of_evaluations
=
max_number_of_evaluations
,
checkpoint_timeout
=
checkpoint_timeout
,
timeout_fn
=
timeout_fn
)
research/efficient-hrl/run_env.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Random policy on an environment."""
import
tensorflow
as
tf
import
numpy
as
np
import
random
from
environments
import
create_maze_env
app
=
tf
.
app
flags
=
tf
.
flags
logging
=
tf
.
logging
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'env'
,
'AntMaze'
,
'environment name: AntMaze, AntPush, or AntFall'
)
flags
.
DEFINE_integer
(
'episode_length'
,
500
,
'episode length'
)
flags
.
DEFINE_integer
(
'num_episodes'
,
50
,
'number of episodes'
)
def
get_goal_sample_fn
(
env_name
):
if
env_name
==
'AntMaze'
:
# NOTE: When evaluating (i.e. the metrics shown in the paper,
# we use the commented out goal sampling function. The uncommented
# one is only used for training.
#return lambda: np.array([0., 16.])
return
lambda
:
np
.
random
.
uniform
((
-
4
,
-
4
),
(
20
,
20
))
elif
env_name
==
'AntPush'
:
return
lambda
:
np
.
array
([
0.
,
19.
])
elif
env_name
==
'AntFall'
:
return
lambda
:
np
.
array
([
0.
,
27.
,
4.5
])
else
:
assert
False
,
'Unknown env'
def
get_reward_fn
(
env_name
):
if
env_name
==
'AntMaze'
:
return
lambda
obs
,
goal
:
-
np
.
sum
(
np
.
square
(
obs
[:
2
]
-
goal
))
**
0.5
elif
env_name
==
'AntPush'
:
return
lambda
obs
,
goal
:
-
np
.
sum
(
np
.
square
(
obs
[:
2
]
-
goal
))
**
0.5
elif
env_name
==
'AntFall'
:
return
lambda
obs
,
goal
:
-
np
.
sum
(
np
.
square
(
obs
[:
3
]
-
goal
))
**
0.5
else
:
assert
False
,
'Unknown env'
def
success_fn
(
last_reward
):
return
last_reward
>
-
5.0
class
EnvWithGoal
(
object
):
def
__init__
(
self
,
base_env
,
env_name
):
self
.
base_env
=
base_env
self
.
goal_sample_fn
=
get_goal_sample_fn
(
env_name
)
self
.
reward_fn
=
get_reward_fn
(
env_name
)
self
.
goal
=
None
def
reset
(
self
):
obs
=
self
.
base_env
.
reset
()
self
.
goal
=
self
.
goal_sample_fn
()
return
np
.
concatenate
([
obs
,
self
.
goal
])
def
step
(
self
,
a
):
obs
,
_
,
done
,
info
=
self
.
base_env
.
step
(
a
)
reward
=
self
.
reward_fn
(
obs
,
self
.
goal
)
return
np
.
concatenate
([
obs
,
self
.
goal
]),
reward
,
done
,
info
@
property
def
action_space
(
self
):
return
self
.
base_env
.
action_space
def
run_environment
(
env_name
,
episode_length
,
num_episodes
):
env
=
EnvWithGoal
(
create_maze_env
.
create_maze_env
(
env_name
).
gym
,
env_name
)
def
action_fn
(
obs
):
action_space
=
env
.
action_space
action_space_mean
=
(
action_space
.
low
+
action_space
.
high
)
/
2.0
action_space_magn
=
(
action_space
.
high
-
action_space
.
low
)
/
2.0
random_action
=
(
action_space_mean
+
action_space_magn
*
np
.
random
.
uniform
(
low
=-
1.0
,
high
=
1.0
,
size
=
action_space
.
shape
))
return
random_action
rewards
=
[]
successes
=
[]
for
ep
in
range
(
num_episodes
):
rewards
.
append
(
0.0
)
successes
.
append
(
False
)
obs
=
env
.
reset
()
for
_
in
range
(
episode_length
):
obs
,
reward
,
done
,
_
=
env
.
step
(
action_fn
(
obs
))
rewards
[
-
1
]
+=
reward
successes
[
-
1
]
=
success_fn
(
reward
)
if
done
:
break
logging
.
info
(
'Episode %d reward: %.2f, Success: %d'
,
ep
+
1
,
rewards
[
-
1
],
successes
[
-
1
])
logging
.
info
(
'Average Reward over %d episodes: %.2f'
,
num_episodes
,
np
.
mean
(
rewards
))
logging
.
info
(
'Average Success over %d episodes: %.2f'
,
num_episodes
,
np
.
mean
(
successes
))
def
main
(
unused_argv
):
logging
.
set_verbosity
(
logging
.
INFO
)
run_environment
(
FLAGS
.
env
,
FLAGS
.
episode_length
,
FLAGS
.
num_episodes
)
if
__name__
==
'__main__'
:
app
.
run
()
research/
astronet/astronet/util/config_util_test
.py
→
research/
efficient-hrl/run_eval
.py
View file @
65da497f
# Copyright 2018 The TensorFlow Authors.
# Copyright 2018 The TensorFlow Authors
All Rights Reserved
.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,8 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for config_util.py."""
r
"""Script for evaluating a UVF agent.
To run locally: See scripts/local_eval.py
"""
from
__future__
import
absolute_import
from
__future__
import
division
...
...
@@ -20,43 +24,28 @@ from __future__ import print_function
import
tensorflow
as
tf
from
astronet.util
import
config_util
class
ConfigUtilTest
(
tf
.
test
.
TestCase
):
def
testUnflatten
(
self
):
# Empty dict.
self
.
assertDictEqual
(
config_util
.
unflatten
({}),
{})
# Already flat dict.
self
.
assertDictEqual
(
config_util
.
unflatten
({
"a"
:
1
,
"b"
:
2
}),
{
"a"
:
1
,
"b"
:
2
})
# Nested dict.
self
.
assertDictEqual
(
config_util
.
unflatten
({
"a"
:
1
,
"b.c"
:
2
,
"b.d.e"
:
3
,
"b.d.f"
:
4
,
}),
{
"a"
:
1
,
"b"
:
{
"c"
:
2
,
"d"
:
{
"e"
:
3
,
"f"
:
4
,
}
}
})
import
gin.tf
# pylint: disable=unused-import
import
eval
as
eval_
# pylint: enable=unused-import
flags
=
tf
.
app
.
flags
FLAGS
=
flags
.
FLAGS
def
main
(
_
):
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
assert
FLAGS
.
checkpoint_dir
,
"Flag 'checkpoint_dir' must be set."
assert
FLAGS
.
eval_dir
,
"Flag 'eval_dir' must be set."
if
FLAGS
.
config_file
:
for
config_file
in
FLAGS
.
config_file
:
gin
.
parse_config_file
(
config_file
)
if
FLAGS
.
params
:
gin
.
parse_config
(
FLAGS
.
params
)
eval_
.
evaluate
(
FLAGS
.
checkpoint_dir
,
FLAGS
.
eval_dir
)
if
__name__
==
"__main__"
:
tf
.
test
.
mai
n
()
tf
.
app
.
ru
n
()
research/
astronet/astrowavenet/data/kepler_light_curves
.py
→
research/
efficient-hrl/run_train
.py
View file @
65da497f
# Copyright 2018 The TensorFlow Authors.
# Copyright 2018 The TensorFlow Authors
All Rights Reserved
.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,8 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Kepler light curve inputs to the AstroWaveNet model."""
r
"""Script for training an RL agent using the UVF algorithm.
To run locally: See scripts/local_train.py
"""
from
__future__
import
absolute_import
from
__future__
import
division
...
...
@@ -20,31 +24,26 @@ from __future__ import print_function
import
tensorflow
as
tf
from
astrowavenet.data
import
base
import
gin.tf
# pylint: enable=unused-import
import
train
# pylint: disable=unused-import
COND_INPUT_KEY
=
"mask"
AR_INPUT_KEY
=
"
fl
ux"
flags
=
tf
.
app
.
flags
FLAGS
=
fl
ags
.
FLAGS
class
KeplerLightCurves
(
base
.
TFRecordDataset
):
"""Kepler light curve inputs to the AstroWaveNet model."""
def
main
(
_
):
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
if
FLAGS
.
config_file
:
for
config_file
in
FLAGS
.
config_file
:
gin
.
parse_config_file
(
config_file
)
if
FLAGS
.
params
:
gin
.
parse_config
(
FLAGS
.
params
)
def
create_example_parser
(
self
):
assert
FLAGS
.
train_dir
,
"Flag 'train_dir' must be set."
return
train
.
train_uvf
(
FLAGS
.
train_dir
)
def
_example_parser
(
serialized
):
"""Parses a single tf.Example proto."""
features
=
tf
.
parse_single_example
(
serialized
,
features
=
{
AR_INPUT_KEY
:
tf
.
VarLenFeature
(
tf
.
float32
),
COND_INPUT_KEY
:
tf
.
VarLenFeature
(
tf
.
int64
),
})
# Extract values from SparseTensor objects.
autoregressive_input
=
features
[
AR_INPUT_KEY
].
values
conditioning_stack
=
tf
.
to_float
(
features
[
COND_INPUT_KEY
].
values
)
return
{
"autoregressive_input"
:
autoregressive_input
,
"conditioning_stack"
:
conditioning_stack
,
}
return
_example_parser
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
research/efficient-hrl/scripts/local_eval.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to run run_eval.py locally.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
from
subprocess
import
call
import
sys
CONFIGS_PATH
=
'configs'
CONTEXT_CONFIGS_PATH
=
'context/configs'
def
main
():
bb
=
'./'
base_num_args
=
6
if
len
(
sys
.
argv
)
<
base_num_args
:
print
(
"usage: python %s <exp_name> <context_setting_gin> <context_gin> "
"<agent_gin> <suite> [params...]"
%
sys
.
argv
[
0
])
sys
.
exit
(
0
)
exp
=
sys
.
argv
[
1
]
context_setting
=
sys
.
argv
[
2
]
context
=
sys
.
argv
[
3
]
agent
=
sys
.
argv
[
4
]
assert
sys
.
argv
[
5
]
in
[
"suite"
],
"args[5] must be `suite'"
suite
=
""
binary
=
"python {bb}/run_eval{suite}.py "
.
format
(
bb
=
bb
,
suite
=
suite
)
h
=
os
.
environ
[
"HOME"
]
ucp
=
CONFIGS_PATH
ccp
=
CONTEXT_CONFIGS_PATH
extra
=
''
command_str
=
(
"{binary} "
"--logtostderr "
"--checkpoint_dir={h}/tmp/{context_setting}/{context}/{agent}/{exp}/train "
"--eval_dir={h}/tmp/{context_setting}/{context}/{agent}/{exp}/eval "
"--config_file={ucp}/{agent}.gin "
"--config_file={ucp}/eval_{extra}uvf.gin "
"--config_file={ccp}/{context_setting}.gin "
"--config_file={ccp}/{context}.gin "
).
format
(
h
=
h
,
ucp
=
ucp
,
ccp
=
ccp
,
context_setting
=
context_setting
,
context
=
context
,
agent
=
agent
,
extra
=
extra
,
suite
=
suite
,
exp
=
exp
,
binary
=
binary
)
for
extra_arg
in
sys
.
argv
[
base_num_args
:]:
command_str
+=
"--params='%s' "
%
extra_arg
print
(
command_str
)
call
(
command_str
,
shell
=
True
)
if
__name__
==
"__main__"
:
main
()
research/efficient-hrl/scripts/local_train.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to run run_train.py locally.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
random
from
subprocess
import
call
import
sys
CONFIGS_PATH
=
'./configs'
CONTEXT_CONFIGS_PATH
=
'./context/configs'
def
main
():
bb
=
'./'
base_num_args
=
6
if
len
(
sys
.
argv
)
<
base_num_args
:
print
(
"usage: python %s <exp_name> <context_setting_gin> <env_context_gin> "
"<agent_gin> <suite> [params...]"
%
sys
.
argv
[
0
])
sys
.
exit
(
0
)
exp
=
sys
.
argv
[
1
]
# Name for experiment, e.g. 'test001'
context_setting
=
sys
.
argv
[
2
]
# Context setting, e.g. 'hiro_orig'
context
=
sys
.
argv
[
3
]
# Environment-specific context, e.g. 'ant_maze'
agent
=
sys
.
argv
[
4
]
# Agent settings, e.g. 'base_uvf'
assert
sys
.
argv
[
5
]
in
[
"suite"
],
"args[5] must be `suite'"
suite
=
""
binary
=
"python {bb}/run_train{suite}.py "
.
format
(
bb
=
bb
,
suite
=
suite
)
h
=
os
.
environ
[
"HOME"
]
ucp
=
CONFIGS_PATH
ccp
=
CONTEXT_CONFIGS_PATH
extra
=
''
port
=
random
.
randint
(
2000
,
8000
)
command_str
=
(
"{binary} "
"--train_dir={h}/tmp/{context_setting}/{context}/{agent}/{exp}/train "
"--config_file={ucp}/{agent}.gin "
"--config_file={ucp}/train_{extra}uvf.gin "
"--config_file={ccp}/{context_setting}.gin "
"--config_file={ccp}/{context}.gin "
"--summarize_gradients=False "
"--save_interval_secs=60 "
"--save_summaries_secs=1 "
"--master=local "
"--alsologtostderr "
).
format
(
h
=
h
,
ucp
=
ucp
,
context_setting
=
context_setting
,
context
=
context
,
ccp
=
ccp
,
suite
=
suite
,
agent
=
agent
,
extra
=
extra
,
exp
=
exp
,
binary
=
binary
,
port
=
port
)
for
extra_arg
in
sys
.
argv
[
base_num_args
:]:
command_str
+=
"--params='%s' "
%
extra_arg
print
(
command_str
)
call
(
command_str
,
shell
=
True
)
if
__name__
==
"__main__"
:
main
()
research/efficient-hrl/train.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Script for training an RL agent using the UVF algorithm.
To run locally: See run_train.py
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
time
import
tensorflow
as
tf
slim
=
tf
.
contrib
.
slim
import
gin.tf
# pylint: disable=unused-import
import
train_utils
import
agent
as
agent_
from
agents
import
circular_buffer
from
utils
import
utils
as
uvf_utils
from
environments
import
create_maze_env
# pylint: enable=unused-import
flags
=
tf
.
app
.
flags
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'goal_sample_strategy'
,
'sample'
,
'None, sample, FuN'
)
LOAD_PATH
=
None
def
collect_experience
(
tf_env
,
agent
,
meta_agent
,
state_preprocess
,
replay_buffer
,
meta_replay_buffer
,
action_fn
,
meta_action_fn
,
environment_steps
,
num_episodes
,
num_resets
,
episode_rewards
,
episode_meta_rewards
,
store_context
,
disable_agent_reset
):
"""Collect experience in a tf_env into a replay_buffer using action_fn.
Args:
tf_env: A TFEnvironment.
agent: A UVF agent.
meta_agent: A Meta Agent.
replay_buffer: A Replay buffer to collect experience in.
meta_replay_buffer: A Replay buffer to collect meta agent experience in.
action_fn: A function to produce actions given current state.
meta_action_fn: A function to produce meta actions given current state.
environment_steps: A variable to count the number of steps in the tf_env.
num_episodes: A variable to count the number of episodes.
num_resets: A variable to count the number of resets.
store_context: A boolean to check if store context in replay.
disable_agent_reset: A boolean that disables agent from resetting.
Returns:
A collect_experience_op that excute an action and store into the
replay_buffers
"""
tf_env
.
start_collect
()
state
=
tf_env
.
current_obs
()
state_repr
=
state_preprocess
(
state
)
action
=
action_fn
(
state
,
context
=
None
)
with
tf
.
control_dependencies
([
state
]):
transition_type
,
reward
,
discount
=
tf_env
.
step
(
action
)
def
increment_step
():
return
environment_steps
.
assign_add
(
1
)
def
increment_episode
():
return
num_episodes
.
assign_add
(
1
)
def
increment_reset
():
return
num_resets
.
assign_add
(
1
)
def
update_episode_rewards
(
context_reward
,
meta_reward
,
reset
):
new_episode_rewards
=
tf
.
concat
(
[
episode_rewards
[:
1
]
+
context_reward
,
episode_rewards
[
1
:]],
0
)
new_episode_meta_rewards
=
tf
.
concat
(
[
episode_meta_rewards
[:
1
]
+
meta_reward
,
episode_meta_rewards
[
1
:]],
0
)
return
tf
.
group
(
episode_rewards
.
assign
(
tf
.
cond
(
reset
,
lambda
:
tf
.
concat
([[
0.
],
episode_rewards
[:
-
1
]],
0
),
lambda
:
new_episode_rewards
)),
episode_meta_rewards
.
assign
(
tf
.
cond
(
reset
,
lambda
:
tf
.
concat
([[
0.
],
episode_meta_rewards
[:
-
1
]],
0
),
lambda
:
new_episode_meta_rewards
)))
def
no_op_int
():
return
tf
.
constant
(
0
,
dtype
=
tf
.
int64
)
step_cond
=
agent
.
step_cond_fn
(
state
,
action
,
transition_type
,
environment_steps
,
num_episodes
)
reset_episode_cond
=
agent
.
reset_episode_cond_fn
(
state
,
action
,
transition_type
,
environment_steps
,
num_episodes
)
reset_env_cond
=
agent
.
reset_env_cond_fn
(
state
,
action
,
transition_type
,
environment_steps
,
num_episodes
)
increment_step_op
=
tf
.
cond
(
step_cond
,
increment_step
,
no_op_int
)
increment_episode_op
=
tf
.
cond
(
reset_episode_cond
,
increment_episode
,
no_op_int
)
increment_reset_op
=
tf
.
cond
(
reset_env_cond
,
increment_reset
,
no_op_int
)
increment_op
=
tf
.
group
(
increment_step_op
,
increment_episode_op
,
increment_reset_op
)
with
tf
.
control_dependencies
([
increment_op
,
reward
,
discount
]):
next_state
=
tf_env
.
current_obs
()
next_state_repr
=
state_preprocess
(
next_state
)
next_reset_episode_cond
=
tf
.
logical_or
(
agent
.
reset_episode_cond_fn
(
state
,
action
,
transition_type
,
environment_steps
,
num_episodes
),
tf
.
equal
(
discount
,
0.0
))
if
store_context
:
context
=
[
tf
.
identity
(
var
)
+
tf
.
zeros_like
(
var
)
for
var
in
agent
.
context_vars
]
meta_context
=
[
tf
.
identity
(
var
)
+
tf
.
zeros_like
(
var
)
for
var
in
meta_agent
.
context_vars
]
else
:
context
=
[]
meta_context
=
[]
with
tf
.
control_dependencies
([
next_state
]
+
context
+
meta_context
):
if
disable_agent_reset
:
collect_experience_ops
=
[
tf
.
no_op
()]
# don't reset agent
else
:
collect_experience_ops
=
agent
.
cond_begin_episode_op
(
tf
.
logical_not
(
reset_episode_cond
),
[
state
,
action
,
reward
,
next_state
,
state_repr
,
next_state_repr
],
mode
=
'explore'
,
meta_action_fn
=
meta_action_fn
)
context_reward
,
meta_reward
=
collect_experience_ops
collect_experience_ops
=
list
(
collect_experience_ops
)
collect_experience_ops
.
append
(
update_episode_rewards
(
tf
.
reduce_sum
(
context_reward
),
meta_reward
,
reset_episode_cond
))
meta_action_every_n
=
agent
.
tf_context
.
meta_action_every_n
with
tf
.
control_dependencies
(
collect_experience_ops
):
transition
=
[
state
,
action
,
reward
,
discount
,
next_state
]
meta_action
=
tf
.
to_float
(
tf
.
concat
(
context
,
-
1
))
# Meta agent action is low-level context
meta_end
=
tf
.
logical_and
(
# End of meta-transition.
tf
.
equal
(
agent
.
tf_context
.
t
%
meta_action_every_n
,
1
),
agent
.
tf_context
.
t
>
1
)
with
tf
.
variable_scope
(
tf
.
get_variable_scope
(),
reuse
=
tf
.
AUTO_REUSE
):
states_var
=
tf
.
get_variable
(
'states_var'
,
[
meta_action_every_n
,
state
.
shape
[
-
1
]],
state
.
dtype
)
actions_var
=
tf
.
get_variable
(
'actions_var'
,
[
meta_action_every_n
,
action
.
shape
[
-
1
]],
action
.
dtype
)
state_var
=
tf
.
get_variable
(
'state_var'
,
state
.
shape
,
state
.
dtype
)
reward_var
=
tf
.
get_variable
(
'reward_var'
,
reward
.
shape
,
reward
.
dtype
)
meta_action_var
=
tf
.
get_variable
(
'meta_action_var'
,
meta_action
.
shape
,
meta_action
.
dtype
)
meta_context_var
=
[
tf
.
get_variable
(
'meta_context_var%d'
%
idx
,
meta_context
[
idx
].
shape
,
meta_context
[
idx
].
dtype
)
for
idx
in
range
(
len
(
meta_context
))]
actions_var_upd
=
tf
.
scatter_update
(
actions_var
,
(
agent
.
tf_context
.
t
-
2
)
%
meta_action_every_n
,
action
)
with
tf
.
control_dependencies
([
actions_var_upd
]):
actions
=
tf
.
identity
(
actions_var
)
+
tf
.
zeros_like
(
actions_var
)
meta_reward
=
tf
.
identity
(
meta_reward
)
+
tf
.
zeros_like
(
meta_reward
)
meta_reward
=
tf
.
reshape
(
meta_reward
,
reward
.
shape
)
reward
=
0.1
*
meta_reward
meta_transition
=
[
state_var
,
meta_action_var
,
reward_var
+
reward
,
discount
*
(
1
-
tf
.
to_float
(
next_reset_episode_cond
)),
next_state
]
meta_transition
.
extend
([
states_var
,
actions
])
if
store_context
:
# store current and next context into replay
transition
+=
context
+
list
(
agent
.
context_vars
)
meta_transition
+=
meta_context_var
+
list
(
meta_agent
.
context_vars
)
meta_step_cond
=
tf
.
squeeze
(
tf
.
logical_and
(
step_cond
,
tf
.
logical_or
(
next_reset_episode_cond
,
meta_end
)))
collect_experience_op
=
tf
.
group
(
replay_buffer
.
maybe_add
(
transition
,
step_cond
),
meta_replay_buffer
.
maybe_add
(
meta_transition
,
meta_step_cond
),
)
with
tf
.
control_dependencies
([
collect_experience_op
]):
collect_experience_op
=
tf
.
cond
(
reset_env_cond
,
tf_env
.
reset
,
tf_env
.
current_time_step
)
meta_period
=
tf
.
equal
(
agent
.
tf_context
.
t
%
meta_action_every_n
,
1
)
states_var_upd
=
tf
.
scatter_update
(
states_var
,
(
agent
.
tf_context
.
t
-
1
)
%
meta_action_every_n
,
next_state
)
state_var_upd
=
tf
.
assign
(
state_var
,
tf
.
cond
(
meta_period
,
lambda
:
next_state
,
lambda
:
state_var
))
reward_var_upd
=
tf
.
assign
(
reward_var
,
tf
.
cond
(
meta_period
,
lambda
:
tf
.
zeros_like
(
reward_var
),
lambda
:
reward_var
+
reward
))
meta_action
=
tf
.
to_float
(
tf
.
concat
(
agent
.
context_vars
,
-
1
))
meta_action_var_upd
=
tf
.
assign
(
meta_action_var
,
tf
.
cond
(
meta_period
,
lambda
:
meta_action
,
lambda
:
meta_action_var
))
meta_context_var_upd
=
[
tf
.
assign
(
meta_context_var
[
idx
],
tf
.
cond
(
meta_period
,
lambda
:
meta_agent
.
context_vars
[
idx
],
lambda
:
meta_context_var
[
idx
]))
for
idx
in
range
(
len
(
meta_context
))]
return
tf
.
group
(
collect_experience_op
,
states_var_upd
,
state_var_upd
,
reward_var_upd
,
meta_action_var_upd
,
*
meta_context_var_upd
)
def
sample_best_meta_actions
(
state_reprs
,
next_state_reprs
,
prev_meta_actions
,
low_states
,
low_actions
,
low_state_reprs
,
inverse_dynamics
,
uvf_agent
,
k
=
10
):
"""Return meta-actions which approximately maximize low-level log-probs."""
sampled_actions
=
inverse_dynamics
.
sample
(
state_reprs
,
next_state_reprs
,
k
,
prev_meta_actions
)
sampled_actions
=
tf
.
stop_gradient
(
sampled_actions
)
sampled_log_probs
=
tf
.
reshape
(
uvf_agent
.
log_probs
(
tf
.
tile
(
low_states
,
[
k
,
1
,
1
]),
tf
.
tile
(
low_actions
,
[
k
,
1
,
1
]),
tf
.
tile
(
low_state_reprs
,
[
k
,
1
,
1
]),
[
tf
.
reshape
(
sampled_actions
,
[
-
1
,
sampled_actions
.
shape
[
-
1
]])]),
[
k
,
low_states
.
shape
[
0
],
low_states
.
shape
[
1
],
-
1
])
fitness
=
tf
.
reduce_sum
(
sampled_log_probs
,
[
2
,
3
])
best_actions
=
tf
.
argmax
(
fitness
,
0
)
actions
=
tf
.
gather_nd
(
sampled_actions
,
tf
.
stack
([
best_actions
,
tf
.
range
(
prev_meta_actions
.
shape
[
0
],
dtype
=
tf
.
int64
)],
-
1
))
return
actions
@
gin
.
configurable
def
train_uvf
(
train_dir
,
environment
=
None
,
num_bin_actions
=
3
,
agent_class
=
None
,
meta_agent_class
=
None
,
state_preprocess_class
=
None
,
inverse_dynamics_class
=
None
,
exp_action_wrapper
=
None
,
replay_buffer
=
None
,
meta_replay_buffer
=
None
,
replay_num_steps
=
1
,
meta_replay_num_steps
=
1
,
critic_optimizer
=
None
,
actor_optimizer
=
None
,
meta_critic_optimizer
=
None
,
meta_actor_optimizer
=
None
,
repr_optimizer
=
None
,
relabel_contexts
=
False
,
meta_relabel_contexts
=
False
,
batch_size
=
64
,
repeat_size
=
0
,
num_episodes_train
=
2000
,
initial_episodes
=
2
,
initial_steps
=
None
,
num_updates_per_observation
=
1
,
num_collect_per_update
=
1
,
num_collect_per_meta_update
=
1
,
gamma
=
1.0
,
meta_gamma
=
1.0
,
reward_scale_factor
=
1.0
,
target_update_period
=
1
,
should_stop_early
=
None
,
clip_gradient_norm
=
0.0
,
summarize_gradients
=
False
,
debug_summaries
=
False
,
log_every_n_steps
=
100
,
prefetch_queue_capacity
=
2
,
policy_save_dir
=
'policy'
,
save_policy_every_n_steps
=
1000
,
save_policy_interval_secs
=
0
,
replay_context_ratio
=
0.0
,
next_state_as_context_ratio
=
0.0
,
state_index
=
0
,
zero_timer_ratio
=
0.0
,
timer_index
=-
1
,
debug
=
False
,
max_policies_to_save
=
None
,
max_steps_per_episode
=
None
,
load_path
=
LOAD_PATH
):
"""Train an agent."""
tf_env
=
create_maze_env
.
TFPyEnvironment
(
environment
)
observation_spec
=
[
tf_env
.
observation_spec
()]
action_spec
=
[
tf_env
.
action_spec
()]
max_steps_per_episode
=
max_steps_per_episode
or
tf_env
.
pyenv
.
max_episode_steps
assert
max_steps_per_episode
,
'max_steps_per_episode need to be set'
if
initial_steps
is
None
:
initial_steps
=
initial_episodes
*
max_steps_per_episode
if
agent_class
.
ACTION_TYPE
==
'discrete'
:
assert
False
else
:
assert
agent_class
.
ACTION_TYPE
==
'continuous'
assert
agent_class
.
ACTION_TYPE
==
meta_agent_class
.
ACTION_TYPE
with
tf
.
variable_scope
(
'meta_agent'
):
meta_agent
=
meta_agent_class
(
observation_spec
,
action_spec
,
tf_env
,
debug_summaries
=
debug_summaries
)
meta_agent
.
set_replay
(
replay
=
meta_replay_buffer
)
with
tf
.
variable_scope
(
'uvf_agent'
):
uvf_agent
=
agent_class
(
observation_spec
,
action_spec
,
tf_env
,
debug_summaries
=
debug_summaries
)
uvf_agent
.
set_meta_agent
(
agent
=
meta_agent
)
uvf_agent
.
set_replay
(
replay
=
replay_buffer
)
with
tf
.
variable_scope
(
'state_preprocess'
):
state_preprocess
=
state_preprocess_class
()
with
tf
.
variable_scope
(
'inverse_dynamics'
):
inverse_dynamics
=
inverse_dynamics_class
(
meta_agent
.
sub_context_as_action_specs
[
0
])
# Create counter variables
global_step
=
tf
.
contrib
.
framework
.
get_or_create_global_step
()
num_episodes
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int64
,
name
=
'num_episodes'
)
num_resets
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int64
,
name
=
'num_resets'
)
num_updates
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int64
,
name
=
'num_updates'
)
num_meta_updates
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int64
,
name
=
'num_meta_updates'
)
episode_rewards
=
tf
.
Variable
([
0.
]
*
100
,
name
=
'episode_rewards'
)
episode_meta_rewards
=
tf
.
Variable
([
0.
]
*
100
,
name
=
'episode_meta_rewards'
)
# Create counter variables summaries
train_utils
.
create_counter_summaries
([
(
'environment_steps'
,
global_step
),
(
'num_episodes'
,
num_episodes
),
(
'num_resets'
,
num_resets
),
(
'num_updates'
,
num_updates
),
(
'num_meta_updates'
,
num_meta_updates
),
(
'replay_buffer_adds'
,
replay_buffer
.
get_num_adds
()),
(
'meta_replay_buffer_adds'
,
meta_replay_buffer
.
get_num_adds
()),
])
tf
.
summary
.
scalar
(
'avg_episode_rewards'
,
tf
.
reduce_mean
(
episode_rewards
[
1
:]))
tf
.
summary
.
scalar
(
'avg_episode_meta_rewards'
,
tf
.
reduce_mean
(
episode_meta_rewards
[
1
:]))
tf
.
summary
.
histogram
(
'episode_rewards'
,
episode_rewards
[
1
:])
tf
.
summary
.
histogram
(
'episode_meta_rewards'
,
episode_meta_rewards
[
1
:])
# Create init ops
action_fn
=
uvf_agent
.
action
action_fn
=
uvf_agent
.
add_noise_fn
(
action_fn
,
global_step
=
None
)
meta_action_fn
=
meta_agent
.
action
meta_action_fn
=
meta_agent
.
add_noise_fn
(
meta_action_fn
,
global_step
=
None
)
meta_actions_fn
=
meta_agent
.
actions
meta_actions_fn
=
meta_agent
.
add_noise_fn
(
meta_actions_fn
,
global_step
=
None
)
init_collect_experience_op
=
collect_experience
(
tf_env
,
uvf_agent
,
meta_agent
,
state_preprocess
,
replay_buffer
,
meta_replay_buffer
,
action_fn
,
meta_action_fn
,
environment_steps
=
global_step
,
num_episodes
=
num_episodes
,
num_resets
=
num_resets
,
episode_rewards
=
episode_rewards
,
episode_meta_rewards
=
episode_meta_rewards
,
store_context
=
True
,
disable_agent_reset
=
False
,
)
# Create train ops
collect_experience_op
=
collect_experience
(
tf_env
,
uvf_agent
,
meta_agent
,
state_preprocess
,
replay_buffer
,
meta_replay_buffer
,
action_fn
,
meta_action_fn
,
environment_steps
=
global_step
,
num_episodes
=
num_episodes
,
num_resets
=
num_resets
,
episode_rewards
=
episode_rewards
,
episode_meta_rewards
=
episode_meta_rewards
,
store_context
=
True
,
disable_agent_reset
=
False
,
)
train_op_list
=
[]
repr_train_op
=
tf
.
constant
(
0.0
)
for
mode
in
[
'meta'
,
'nometa'
]:
if
mode
==
'meta'
:
agent
=
meta_agent
buff
=
meta_replay_buffer
critic_opt
=
meta_critic_optimizer
actor_opt
=
meta_actor_optimizer
relabel
=
meta_relabel_contexts
num_steps
=
meta_replay_num_steps
my_gamma
=
meta_gamma
,
n_updates
=
num_meta_updates
else
:
agent
=
uvf_agent
buff
=
replay_buffer
critic_opt
=
critic_optimizer
actor_opt
=
actor_optimizer
relabel
=
relabel_contexts
num_steps
=
replay_num_steps
my_gamma
=
gamma
n_updates
=
num_updates
with
tf
.
name_scope
(
mode
):
batch
=
buff
.
get_random_batch
(
batch_size
,
num_steps
=
num_steps
)
states
,
actions
,
rewards
,
discounts
,
next_states
=
batch
[:
5
]
with
tf
.
name_scope
(
'Reward'
):
tf
.
summary
.
scalar
(
'average_step_reward'
,
tf
.
reduce_mean
(
rewards
))
rewards
*=
reward_scale_factor
batch_queue
=
slim
.
prefetch_queue
.
prefetch_queue
(
[
states
,
actions
,
rewards
,
discounts
,
next_states
]
+
batch
[
5
:],
capacity
=
prefetch_queue_capacity
,
name
=
'batch_queue'
)
batch_dequeue
=
batch_queue
.
dequeue
()
if
repeat_size
>
0
:
batch_dequeue
=
[
tf
.
tile
(
batch
,
(
repeat_size
+
1
,)
+
(
1
,)
*
(
batch
.
shape
.
ndims
-
1
))
for
batch
in
batch_dequeue
]
batch_size
*=
(
repeat_size
+
1
)
states
,
actions
,
rewards
,
discounts
,
next_states
=
batch_dequeue
[:
5
]
if
mode
==
'meta'
:
low_states
=
batch_dequeue
[
5
]
low_actions
=
batch_dequeue
[
6
]
low_state_reprs
=
state_preprocess
(
low_states
)
state_reprs
=
state_preprocess
(
states
)
next_state_reprs
=
state_preprocess
(
next_states
)
if
mode
==
'meta'
:
# Re-label meta-action
prev_actions
=
actions
if
FLAGS
.
goal_sample_strategy
==
'None'
:
pass
elif
FLAGS
.
goal_sample_strategy
==
'FuN'
:
actions
=
inverse_dynamics
.
sample
(
state_reprs
,
next_state_reprs
,
1
,
prev_actions
,
sc
=
0.1
)
actions
=
tf
.
stop_gradient
(
actions
)
elif
FLAGS
.
goal_sample_strategy
==
'sample'
:
actions
=
sample_best_meta_actions
(
state_reprs
,
next_state_reprs
,
prev_actions
,
low_states
,
low_actions
,
low_state_reprs
,
inverse_dynamics
,
uvf_agent
,
k
=
10
)
else
:
assert
False
if
state_preprocess
.
trainable
and
mode
==
'meta'
:
# Representation learning is based on meta-transitions, but is trained
# along with low-level policy updates.
repr_loss
,
_
,
_
=
state_preprocess
.
loss
(
states
,
next_states
,
low_actions
,
low_states
)
repr_train_op
=
slim
.
learning
.
create_train_op
(
repr_loss
,
repr_optimizer
,
global_step
=
None
,
update_ops
=
None
,
summarize_gradients
=
summarize_gradients
,
clip_gradient_norm
=
clip_gradient_norm
,
variables_to_train
=
state_preprocess
.
get_trainable_vars
(),)
# Get contexts for training
contexts
,
next_contexts
=
agent
.
sample_contexts
(
mode
=
'train'
,
batch_size
=
batch_size
,
state
=
states
,
next_state
=
next_states
,
)
if
not
relabel
:
# Re-label context (in the style of TDM or HER).
contexts
,
next_contexts
=
(
batch_dequeue
[
-
2
*
len
(
contexts
):
-
1
*
len
(
contexts
)],
batch_dequeue
[
-
1
*
len
(
contexts
):])
merged_states
=
agent
.
merged_states
(
states
,
contexts
)
merged_next_states
=
agent
.
merged_states
(
next_states
,
next_contexts
)
if
mode
==
'nometa'
:
context_rewards
,
context_discounts
=
agent
.
compute_rewards
(
'train'
,
state_reprs
,
actions
,
rewards
,
next_state_reprs
,
contexts
)
elif
mode
==
'meta'
:
# Meta-agent uses sum of rewards, not context-specific rewards.
_
,
context_discounts
=
agent
.
compute_rewards
(
'train'
,
states
,
actions
,
rewards
,
next_states
,
contexts
)
context_rewards
=
rewards
if
agent
.
gamma_index
is
not
None
:
context_discounts
*=
tf
.
cast
(
tf
.
reshape
(
contexts
[
agent
.
gamma_index
],
(
-
1
,)),
dtype
=
context_discounts
.
dtype
)
else
:
context_discounts
*=
my_gamma
critic_loss
=
agent
.
critic_loss
(
merged_states
,
actions
,
context_rewards
,
context_discounts
,
merged_next_states
)
critic_loss
=
tf
.
reduce_mean
(
critic_loss
)
actor_loss
=
agent
.
actor_loss
(
merged_states
,
actions
,
context_rewards
,
context_discounts
,
merged_next_states
)
actor_loss
*=
tf
.
to_float
(
# Only update actor every N steps.
tf
.
equal
(
n_updates
%
target_update_period
,
0
))
critic_train_op
=
slim
.
learning
.
create_train_op
(
critic_loss
,
critic_opt
,
global_step
=
n_updates
,
update_ops
=
None
,
summarize_gradients
=
summarize_gradients
,
clip_gradient_norm
=
clip_gradient_norm
,
variables_to_train
=
agent
.
get_trainable_critic_vars
(),)
critic_train_op
=
uvf_utils
.
tf_print
(
critic_train_op
,
[
critic_train_op
],
message
=
'critic_loss'
,
print_freq
=
1000
,
name
=
'critic_loss'
)
train_op_list
.
append
(
critic_train_op
)
if
actor_loss
is
not
None
:
actor_train_op
=
slim
.
learning
.
create_train_op
(
actor_loss
,
actor_opt
,
global_step
=
None
,
update_ops
=
None
,
summarize_gradients
=
summarize_gradients
,
clip_gradient_norm
=
clip_gradient_norm
,
variables_to_train
=
agent
.
get_trainable_actor_vars
(),)
actor_train_op
=
uvf_utils
.
tf_print
(
actor_train_op
,
[
actor_train_op
],
message
=
'actor_loss'
,
print_freq
=
1000
,
name
=
'actor_loss'
)
train_op_list
.
append
(
actor_train_op
)
assert
len
(
train_op_list
)
==
4
# Update targets should happen after the networks have been updated.
with
tf
.
control_dependencies
(
train_op_list
[
2
:]):
update_targets_op
=
uvf_utils
.
periodically
(
uvf_agent
.
update_targets
,
target_update_period
,
'update_targets'
)
if
meta_agent
is
not
None
:
with
tf
.
control_dependencies
(
train_op_list
[:
2
]):
update_meta_targets_op
=
uvf_utils
.
periodically
(
meta_agent
.
update_targets
,
target_update_period
,
'update_targets'
)
assert_op
=
tf
.
Assert
(
# Hack to get training to stop.
tf
.
less_equal
(
global_step
,
200
+
num_episodes_train
*
max_steps_per_episode
),
[
global_step
])
with
tf
.
control_dependencies
([
update_targets_op
,
assert_op
]):
train_op
=
tf
.
add_n
(
train_op_list
[
2
:],
name
=
'post_update_targets'
)
# Representation training steps on every low-level policy training step.
train_op
+=
repr_train_op
with
tf
.
control_dependencies
([
update_meta_targets_op
,
assert_op
]):
meta_train_op
=
tf
.
add_n
(
train_op_list
[:
2
],
name
=
'post_update_meta_targets'
)
if
debug_summaries
:
train_
.
gen_debug_batch_summaries
(
batch
)
slim
.
summaries
.
add_histogram_summaries
(
uvf_agent
.
get_trainable_critic_vars
(),
'critic_vars'
)
slim
.
summaries
.
add_histogram_summaries
(
uvf_agent
.
get_trainable_actor_vars
(),
'actor_vars'
)
train_ops
=
train_utils
.
TrainOps
(
train_op
,
meta_train_op
,
collect_experience_op
)
policy_save_path
=
os
.
path
.
join
(
train_dir
,
policy_save_dir
,
'model.ckpt'
)
policy_vars
=
uvf_agent
.
get_actor_vars
()
+
meta_agent
.
get_actor_vars
()
+
[
global_step
,
num_episodes
,
num_resets
]
+
list
(
uvf_agent
.
context_vars
)
+
list
(
meta_agent
.
context_vars
)
+
state_preprocess
.
get_trainable_vars
()
# add critic vars, since some test evaluation depends on them
policy_vars
+=
uvf_agent
.
get_trainable_critic_vars
()
+
meta_agent
.
get_trainable_critic_vars
()
policy_saver
=
tf
.
train
.
Saver
(
policy_vars
,
max_to_keep
=
max_policies_to_save
,
sharded
=
False
)
lowlevel_vars
=
(
uvf_agent
.
get_actor_vars
()
+
uvf_agent
.
get_trainable_critic_vars
()
+
state_preprocess
.
get_trainable_vars
())
lowlevel_saver
=
tf
.
train
.
Saver
(
lowlevel_vars
)
def
policy_save_fn
(
sess
):
policy_saver
.
save
(
sess
,
policy_save_path
,
global_step
=
global_step
,
write_meta_graph
=
False
)
if
save_policy_interval_secs
>
0
:
tf
.
logging
.
info
(
'Wait %d secs after save policy.'
%
save_policy_interval_secs
)
time
.
sleep
(
save_policy_interval_secs
)
train_step_fn
=
train_utils
.
TrainStep
(
max_number_of_steps
=
num_episodes_train
*
max_steps_per_episode
+
100
,
num_updates_per_observation
=
num_updates_per_observation
,
num_collect_per_update
=
num_collect_per_update
,
num_collect_per_meta_update
=
num_collect_per_meta_update
,
log_every_n_steps
=
log_every_n_steps
,
policy_save_fn
=
policy_save_fn
,
save_policy_every_n_steps
=
save_policy_every_n_steps
,
should_stop_early
=
should_stop_early
).
train_step
local_init_op
=
tf
.
local_variables_initializer
()
init_targets_op
=
tf
.
group
(
uvf_agent
.
update_targets
(
1.0
),
meta_agent
.
update_targets
(
1.0
))
def
initialize_training_fn
(
sess
):
"""Initialize training function."""
sess
.
run
(
local_init_op
)
sess
.
run
(
init_targets_op
)
if
load_path
:
tf
.
logging
.
info
(
'Restoring low-level from %s'
%
load_path
)
lowlevel_saver
.
restore
(
sess
,
load_path
)
global_step_value
=
sess
.
run
(
global_step
)
assert
global_step_value
==
0
,
'Global step should be zero.'
collect_experience_call
=
sess
.
make_callable
(
init_collect_experience_op
)
for
_
in
range
(
initial_steps
):
collect_experience_call
()
train_saver
=
tf
.
train
.
Saver
(
max_to_keep
=
2
,
sharded
=
True
)
tf
.
logging
.
info
(
'train dir: %s'
,
train_dir
)
return
slim
.
learning
.
train
(
train_ops
,
train_dir
,
train_step_fn
=
train_step_fn
,
save_interval_secs
=
FLAGS
.
save_interval_secs
,
saver
=
train_saver
,
log_every_n_steps
=
0
,
global_step
=
global_step
,
master
=
""
,
is_chief
=
(
FLAGS
.
task
==
0
),
save_summaries_secs
=
FLAGS
.
save_summaries_secs
,
init_fn
=
initialize_training_fn
)
research/efficient-hrl/train_utils.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
""""""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
namedtuple
import
os
import
time
import
tensorflow
as
tf
import
gin.tf
flags
=
tf
.
app
.
flags
flags
.
DEFINE_multi_string
(
'config_file'
,
None
,
'List of paths to the config files.'
)
flags
.
DEFINE_multi_string
(
'params'
,
None
,
'Newline separated list of Gin parameter bindings.'
)
flags
.
DEFINE_string
(
'train_dir'
,
None
,
'Directory for writing logs/summaries during training.'
)
flags
.
DEFINE_string
(
'master'
,
'local'
,
'BNS name of the TensorFlow master to use.'
)
flags
.
DEFINE_integer
(
'task'
,
0
,
'task id'
)
flags
.
DEFINE_integer
(
'save_interval_secs'
,
300
,
'The frequency at which '
'checkpoints are saved, in seconds.'
)
flags
.
DEFINE_integer
(
'save_summaries_secs'
,
30
,
'The frequency at which '
'summaries are saved, in seconds.'
)
flags
.
DEFINE_boolean
(
'summarize_gradients'
,
False
,
'Whether to generate gradient summaries.'
)
FLAGS
=
flags
.
FLAGS
TrainOps
=
namedtuple
(
'TrainOps'
,
[
'train_op'
,
'meta_train_op'
,
'collect_experience_op'
])
class
TrainStep
(
object
):
"""Handles training step."""
def
__init__
(
self
,
max_number_of_steps
=
0
,
num_updates_per_observation
=
1
,
num_collect_per_update
=
1
,
num_collect_per_meta_update
=
1
,
log_every_n_steps
=
1
,
policy_save_fn
=
None
,
save_policy_every_n_steps
=
0
,
should_stop_early
=
None
):
"""Returns a function that is executed at each step of slim training.
Args:
max_number_of_steps: Optional maximum number of train steps to take.
num_updates_per_observation: Number of updates per observation.
log_every_n_steps: The frequency, in terms of global steps, that the loss
and global step and logged.
policy_save_fn: A tf.Saver().save function to save the policy.
save_policy_every_n_steps: How frequently to save the policy.
should_stop_early: Optional hook to report whether training should stop.
Raises:
ValueError: If policy_save_fn is not provided when
save_policy_every_n_steps > 0.
"""
if
save_policy_every_n_steps
and
policy_save_fn
is
None
:
raise
ValueError
(
'policy_save_fn is required when save_policy_every_n_steps > 0'
)
self
.
max_number_of_steps
=
max_number_of_steps
self
.
num_updates_per_observation
=
num_updates_per_observation
self
.
num_collect_per_update
=
num_collect_per_update
self
.
num_collect_per_meta_update
=
num_collect_per_meta_update
self
.
log_every_n_steps
=
log_every_n_steps
self
.
policy_save_fn
=
policy_save_fn
self
.
save_policy_every_n_steps
=
save_policy_every_n_steps
self
.
should_stop_early
=
should_stop_early
self
.
last_global_step_val
=
0
self
.
train_op_fn
=
None
self
.
collect_and_train_fn
=
None
tf
.
logging
.
info
(
'Training for %d max_number_of_steps'
,
self
.
max_number_of_steps
)
def
train_step
(
self
,
sess
,
train_ops
,
global_step
,
_
):
"""This function will be called at each step of training.
This represents one step of the DDPG algorithm and can include:
1. collect a <state, action, reward, next_state> transition
2. update the target network
3. train the actor
4. train the critic
Args:
sess: A Tensorflow session.
train_ops: A DdpgTrainOps tuple of train ops to run.
global_step: The global step.
Returns:
A scalar total loss.
A boolean should stop.
"""
start_time
=
time
.
time
()
if
self
.
train_op_fn
is
None
:
self
.
train_op_fn
=
sess
.
make_callable
([
train_ops
.
train_op
,
global_step
])
self
.
meta_train_op_fn
=
sess
.
make_callable
([
train_ops
.
meta_train_op
,
global_step
])
self
.
collect_fn
=
sess
.
make_callable
([
train_ops
.
collect_experience_op
,
global_step
])
self
.
collect_and_train_fn
=
sess
.
make_callable
(
[
train_ops
.
train_op
,
global_step
,
train_ops
.
collect_experience_op
])
self
.
collect_and_meta_train_fn
=
sess
.
make_callable
(
[
train_ops
.
meta_train_op
,
global_step
,
train_ops
.
collect_experience_op
])
for
_
in
range
(
self
.
num_collect_per_update
-
1
):
self
.
collect_fn
()
for
_
in
range
(
self
.
num_updates_per_observation
-
1
):
self
.
train_op_fn
()
total_loss
,
global_step_val
,
_
=
self
.
collect_and_train_fn
()
if
(
global_step_val
//
self
.
num_collect_per_meta_update
!=
self
.
last_global_step_val
//
self
.
num_collect_per_meta_update
):
self
.
meta_train_op_fn
()
time_elapsed
=
time
.
time
()
-
start_time
should_stop
=
False
if
self
.
max_number_of_steps
:
should_stop
=
global_step_val
>=
self
.
max_number_of_steps
if
global_step_val
!=
self
.
last_global_step_val
:
if
(
self
.
save_policy_every_n_steps
and
global_step_val
//
self
.
save_policy_every_n_steps
!=
self
.
last_global_step_val
//
self
.
save_policy_every_n_steps
):
self
.
policy_save_fn
(
sess
)
if
(
self
.
log_every_n_steps
and
global_step_val
%
self
.
log_every_n_steps
==
0
):
tf
.
logging
.
info
(
'global step %d: loss = %.4f (%.3f sec/step) (%d steps/sec)'
,
global_step_val
,
total_loss
,
time_elapsed
,
1
/
time_elapsed
)
self
.
last_global_step_val
=
global_step_val
stop_early
=
bool
(
self
.
should_stop_early
and
self
.
should_stop_early
())
return
total_loss
,
should_stop
or
stop_early
def
create_counter_summaries
(
counters
):
"""Add named summaries to counters, a list of tuples (name, counter)."""
if
counters
:
with
tf
.
name_scope
(
'Counters/'
):
for
name
,
counter
in
counters
:
tf
.
summary
.
scalar
(
name
,
counter
)
def
gen_debug_batch_summaries
(
batch
):
"""Generates summaries for the sampled replay batch."""
states
,
actions
,
rewards
,
_
,
next_states
=
batch
with
tf
.
name_scope
(
'batch'
):
for
s
in
range
(
states
.
get_shape
()[
-
1
]):
tf
.
summary
.
histogram
(
'states_%d'
%
s
,
states
[:,
s
])
for
s
in
range
(
states
.
get_shape
()[
-
1
]):
tf
.
summary
.
histogram
(
'next_states_%d'
%
s
,
next_states
[:,
s
])
for
a
in
range
(
actions
.
get_shape
()[
-
1
]):
tf
.
summary
.
histogram
(
'actions_%d'
%
a
,
actions
[:,
a
])
tf
.
summary
.
histogram
(
'rewards'
,
rewards
)
research/efficient-hrl/utils/__init__.py
0 → 100644
View file @
65da497f
research/efficient-hrl/utils/eval_utils.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Evaluation utility functions.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
tensorflow
as
tf
from
collections
import
namedtuple
logging
=
tf
.
logging
import
gin.tf
@
gin
.
configurable
def
evaluate_checkpoint_repeatedly
(
checkpoint_dir
,
evaluate_checkpoint_fn
,
eval_interval_secs
=
600
,
max_number_of_evaluations
=
None
,
checkpoint_timeout
=
None
,
timeout_fn
=
None
):
"""Evaluates a checkpointed model at a set interval."""
if
max_number_of_evaluations
is
not
None
and
max_number_of_evaluations
<=
0
:
raise
ValueError
(
'`max_number_of_evaluations` must be either None or a positive number.'
)
number_of_evaluations
=
0
for
checkpoint_path
in
tf
.
contrib
.
training
.
checkpoints_iterator
(
checkpoint_dir
,
min_interval_secs
=
eval_interval_secs
,
timeout
=
checkpoint_timeout
,
timeout_fn
=
timeout_fn
):
retries
=
3
for
_
in
range
(
retries
):
try
:
should_stop
=
evaluate_checkpoint_fn
(
checkpoint_path
)
break
except
tf
.
errors
.
DataLossError
as
e
:
logging
.
warn
(
'Encountered a DataLossError while evaluating a checkpoint. This '
'can happen when reading a checkpoint before it is fully written. '
'Retrying...'
)
time
.
sleep
(
2.0
)
def
compute_model_loss
(
sess
,
model_rollout_fn
,
states
,
actions
):
"""Computes model loss."""
preds
,
losses
=
[],
[]
preds
.
append
(
states
[
0
])
losses
.
append
(
0
)
for
state
,
action
in
zip
(
states
[
1
:],
actions
[
1
:]):
pred
=
model_rollout_fn
(
sess
,
preds
[
-
1
],
action
)
loss
=
np
.
sqrt
(
np
.
sum
((
state
-
pred
)
**
2
))
preds
.
append
(
pred
)
losses
.
append
(
loss
)
return
preds
,
losses
def
compute_average_reward
(
sess
,
env_base
,
step_fn
,
gamma
,
num_steps
,
num_episodes
):
"""Computes the discounted reward for a given number of steps.
Args:
sess: The tensorflow session.
env_base: A python environment.
step_fn: A function that takes in `sess` and returns a list of
[state, action, reward, discount, transition_type] values.
gamma: discounting factor to apply to the reward.
num_steps: number of steps to compute the reward over.
num_episodes: number of episodes to average the reward over.
Returns:
average_reward: a scalar of discounted reward.
last_reward: last reward received.
"""
average_reward
=
0
average_last_reward
=
0
average_meta_reward
=
0
average_last_meta_reward
=
0
average_success
=
0.
states
,
actions
=
None
,
None
for
i
in
range
(
num_episodes
):
env_base
.
end_episode
()
env_base
.
begin_episode
()
(
reward
,
last_reward
,
meta_reward
,
last_meta_reward
,
states
,
actions
)
=
compute_reward
(
sess
,
step_fn
,
gamma
,
num_steps
)
s_reward
=
last_meta_reward
# Navigation
success
=
(
s_reward
>
-
5.0
)
# When using diff=False
logging
.
info
(
'Episode = %d, reward = %s, meta_reward = %f, '
'last_reward = %s, last meta_reward = %f, success = %s'
,
i
,
reward
,
meta_reward
,
last_reward
,
last_meta_reward
,
success
)
average_reward
+=
reward
average_last_reward
+=
last_reward
average_meta_reward
+=
meta_reward
average_last_meta_reward
+=
last_meta_reward
average_success
+=
success
average_reward
/=
num_episodes
average_last_reward
/=
num_episodes
average_meta_reward
/=
num_episodes
average_last_meta_reward
/=
num_episodes
average_success
/=
num_episodes
return
(
average_reward
,
average_last_reward
,
average_meta_reward
,
average_last_meta_reward
,
average_success
,
states
,
actions
)
def
compute_reward
(
sess
,
step_fn
,
gamma
,
num_steps
):
"""Computes the discounted reward for a given number of steps.
Args:
sess: The tensorflow session.
step_fn: A function that takes in `sess` and returns a list of
[state, action, reward, discount, transition_type] values.
gamma: discounting factor to apply to the reward.
num_steps: number of steps to compute the reward over.
Returns:
reward: cumulative discounted reward.
last_reward: reward received at final step.
"""
total_reward
=
0
total_meta_reward
=
0
gamma_step
=
1
states
=
[]
actions
=
[]
for
_
in
range
(
num_steps
):
state
,
action
,
transition_type
,
reward
,
meta_reward
,
discount
,
_
,
_
=
step_fn
(
sess
)
total_reward
+=
reward
*
gamma_step
*
discount
total_meta_reward
+=
meta_reward
*
gamma_step
*
discount
gamma_step
*=
gamma
states
.
append
(
state
)
actions
.
append
(
action
)
return
(
total_reward
,
reward
,
total_meta_reward
,
meta_reward
,
states
,
actions
)
research/efficient-hrl/utils/utils.py
0 → 100644
View file @
65da497f
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TensorFlow utility functions.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
copy
import
deepcopy
import
tensorflow
as
tf
from
tf_agents
import
specs
from
tf_agents.utils
import
common
_tf_print_counts
=
dict
()
_tf_print_running_sums
=
dict
()
_tf_print_running_counts
=
dict
()
_tf_print_ids
=
0
def
get_contextual_env_base
(
env_base
,
begin_ops
=
None
,
end_ops
=
None
):
"""Wrap env_base with additional tf ops."""
# pylint: disable=protected-access
def
init
(
self_
,
env_base
):
self_
.
_env_base
=
env_base
attribute_list
=
[
"_render_mode"
,
"_gym_env"
]
for
attribute
in
attribute_list
:
if
hasattr
(
env_base
,
attribute
):
setattr
(
self_
,
attribute
,
getattr
(
env_base
,
attribute
))
if
hasattr
(
env_base
,
"physics"
):
self_
.
_physics
=
env_base
.
physics
elif
hasattr
(
env_base
,
"gym"
):
class
Physics
(
object
):
def
render
(
self
,
*
args
,
**
kwargs
):
return
env_base
.
gym
.
render
(
"rgb_array"
)
physics
=
Physics
()
self_
.
_physics
=
physics
self_
.
physics
=
physics
def
set_sess
(
self_
,
sess
):
self_
.
_sess
=
sess
if
hasattr
(
self_
.
_env_base
,
"set_sess"
):
self_
.
_env_base
.
set_sess
(
sess
)
def
begin_episode
(
self_
):
self_
.
_env_base
.
reset
()
if
begin_ops
is
not
None
:
self_
.
_sess
.
run
(
begin_ops
)
def
end_episode
(
self_
):
self_
.
_env_base
.
reset
()
if
end_ops
is
not
None
:
self_
.
_sess
.
run
(
end_ops
)
return
type
(
"ContextualEnvBase"
,
(
env_base
.
__class__
,),
dict
(
__init__
=
init
,
set_sess
=
set_sess
,
begin_episode
=
begin_episode
,
end_episode
=
end_episode
,
))(
env_base
)
# pylint: enable=protected-access
def
merge_specs
(
specs_
):
"""Merge TensorSpecs.
Args:
specs_: List of TensorSpecs to be merged.
Returns:
a TensorSpec: a merged TensorSpec.
"""
shape
=
specs_
[
0
].
shape
dtype
=
specs_
[
0
].
dtype
name
=
specs_
[
0
].
name
for
spec
in
specs_
[
1
:]:
assert
shape
[
1
:]
==
spec
.
shape
[
1
:],
"incompatible shapes: %s, %s"
%
(
shape
,
spec
.
shape
)
assert
dtype
==
spec
.
dtype
,
"incompatible dtypes: %s, %s"
%
(
dtype
,
spec
.
dtype
)
shape
=
merge_shapes
((
shape
,
spec
.
shape
),
axis
=
0
)
return
specs
.
TensorSpec
(
shape
=
shape
,
dtype
=
dtype
,
name
=
name
,
)
def
merge_shapes
(
shapes
,
axis
=
0
):
"""Merge TensorShapes.
Args:
shapes: List of TensorShapes to be merged.
axis: optional, the axis to merge shaped.
Returns:
a TensorShape: a merged TensorShape.
"""
assert
len
(
shapes
)
>
1
dims
=
deepcopy
(
shapes
[
0
].
dims
)
for
shape
in
shapes
[
1
:]:
assert
shapes
[
0
].
ndims
==
shape
.
ndims
dims
[
axis
]
+=
shape
.
dims
[
axis
]
return
tf
.
TensorShape
(
dims
=
dims
)
def
get_all_vars
(
ignore_scopes
=
None
):
"""Get all tf variables in scope.
Args:
ignore_scopes: A list of scope names to ignore.
Returns:
A list of all tf variables in scope.
"""
all_vars
=
tf
.
get_collection
(
tf
.
GraphKeys
.
GLOBAL_VARIABLES
)
all_vars
=
[
var
for
var
in
all_vars
if
ignore_scopes
is
None
or
not
any
(
var
.
name
.
startswith
(
scope
)
for
scope
in
ignore_scopes
)]
return
all_vars
def
clip
(
tensor
,
range_
=
None
):
"""Return a tf op which clips tensor according to range_.
Args:
tensor: A Tensor to be clipped.
range_: None, or a tuple representing (minval, maxval)
Returns:
A clipped Tensor.
"""
if
range_
is
None
:
return
tf
.
identity
(
tensor
)
elif
isinstance
(
range_
,
(
tuple
,
list
)):
assert
len
(
range_
)
==
2
return
tf
.
clip_by_value
(
tensor
,
range_
[
0
],
range_
[
1
])
else
:
raise
NotImplementedError
(
"Unacceptable range input: %r"
%
range_
)
def
clip_to_bounds
(
value
,
minimum
,
maximum
):
"""Clips value to be between minimum and maximum.
Args:
value: (tensor) value to be clipped.
minimum: (numpy float array) minimum value to clip to.
maximum: (numpy float array) maximum value to clip to.
Returns:
clipped_value: (tensor) `value` clipped to between `minimum` and `maximum`.
"""
value
=
tf
.
minimum
(
value
,
maximum
)
return
tf
.
maximum
(
value
,
minimum
)
clip_to_spec
=
common
.
clip_to_spec
def
_clip_to_spec
(
value
,
spec
):
"""Clips value to a given bounded tensor spec.
Args:
value: (tensor) value to be clipped.
spec: (BoundedTensorSpec) spec containing min. and max. values for clipping.
Returns:
clipped_value: (tensor) `value` clipped to be compatible with `spec`.
"""
return
clip_to_bounds
(
value
,
spec
.
minimum
,
spec
.
maximum
)
join_scope
=
common
.
join_scope
def
_join_scope
(
parent_scope
,
child_scope
):
"""Joins a parent and child scope using `/`, checking for empty/none.
Args:
parent_scope: (string) parent/prefix scope.
child_scope: (string) child/suffix scope.
Returns:
joined scope: (string) parent and child scopes joined by /.
"""
if
not
parent_scope
:
return
child_scope
if
not
child_scope
:
return
parent_scope
return
'/'
.
join
([
parent_scope
,
child_scope
])
def
assign_vars
(
vars_
,
values
):
"""Returns the update ops for assigning a list of vars.
Args:
vars_: A list of variables.
values: A list of tensors representing new values.
Returns:
A list of update ops for the variables.
"""
return
[
var
.
assign
(
value
)
for
var
,
value
in
zip
(
vars_
,
values
)]
def
identity_vars
(
vars_
):
"""Return the identity ops for a list of tensors.
Args:
vars_: A list of tensors.
Returns:
A list of identity ops.
"""
return
[
tf
.
identity
(
var
)
for
var
in
vars_
]
def
tile
(
var
,
batch_size
=
1
):
"""Return tiled tensor.
Args:
var: A tensor representing the state.
batch_size: Batch size.
Returns:
A tensor with shape [batch_size,] + var.shape.
"""
batch_var
=
tf
.
tile
(
tf
.
expand_dims
(
var
,
0
),
(
batch_size
,)
+
(
1
,)
*
var
.
get_shape
().
ndims
)
return
batch_var
def
batch_list
(
vars_list
):
"""Batch a list of variables.
Args:
vars_list: A list of tensor variables.
Returns:
A list of tensor variables with additional first dimension.
"""
return
[
tf
.
expand_dims
(
var
,
0
)
for
var
in
vars_list
]
def
tf_print
(
op
,
tensors
,
message
=
""
,
first_n
=-
1
,
name
=
None
,
sub_messages
=
None
,
print_freq
=-
1
,
include_count
=
True
):
"""tf.Print, but to stdout."""
# TODO(shanegu): `name` is deprecated. Remove from the rest of codes.
global
_tf_print_ids
_tf_print_ids
+=
1
name
=
_tf_print_ids
_tf_print_counts
[
name
]
=
0
if
print_freq
>
0
:
_tf_print_running_sums
[
name
]
=
[
0
for
_
in
tensors
]
_tf_print_running_counts
[
name
]
=
0
def
print_message
(
*
xs
):
"""print message fn."""
_tf_print_counts
[
name
]
+=
1
if
print_freq
>
0
:
for
i
,
x
in
enumerate
(
xs
):
_tf_print_running_sums
[
name
][
i
]
+=
x
_tf_print_running_counts
[
name
]
+=
1
if
(
print_freq
<=
0
or
_tf_print_running_counts
[
name
]
>=
print_freq
)
and
(
first_n
<
0
or
_tf_print_counts
[
name
]
<=
first_n
):
for
i
,
x
in
enumerate
(
xs
):
if
print_freq
>
0
:
del
x
x
=
_tf_print_running_sums
[
name
][
i
]
/
_tf_print_running_counts
[
name
]
if
sub_messages
is
None
:
sub_message
=
str
(
i
)
else
:
sub_message
=
sub_messages
[
i
]
log_message
=
"%s, %s"
%
(
message
,
sub_message
)
if
include_count
:
log_message
+=
", count=%d"
%
_tf_print_counts
[
name
]
tf
.
logging
.
info
(
"[%s]: %s"
%
(
log_message
,
x
))
if
print_freq
>
0
:
for
i
,
x
in
enumerate
(
xs
):
_tf_print_running_sums
[
name
][
i
]
=
0
_tf_print_running_counts
[
name
]
=
0
return
xs
[
0
]
print_op
=
tf
.
py_func
(
print_message
,
tensors
,
tensors
[
0
].
dtype
)
with
tf
.
control_dependencies
([
print_op
]):
op
=
tf
.
identity
(
op
)
return
op
periodically
=
common
.
periodically
def
_periodically
(
body
,
period
,
name
=
'periodically'
):
"""Periodically performs a tensorflow op."""
if
period
is
None
or
period
==
0
:
return
tf
.
no_op
()
if
period
<
0
:
raise
ValueError
(
"period cannot be less than 0."
)
if
period
==
1
:
return
body
()
with
tf
.
variable_scope
(
None
,
default_name
=
name
):
counter
=
tf
.
get_variable
(
"counter"
,
shape
=
[],
dtype
=
tf
.
int64
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
period
,
dtype
=
tf
.
int64
))
def
_wrapped_body
():
with
tf
.
control_dependencies
([
body
()]):
return
counter
.
assign
(
1
)
update
=
tf
.
cond
(
tf
.
equal
(
counter
,
period
),
_wrapped_body
,
lambda
:
counter
.
assign_add
(
1
))
return
update
soft_variables_update
=
common
.
soft_variables_update
research/lstm_object_detection/README
View file @
65da497f
Tensorflow mobile video object detection implementation p
o
rposed in the following paper:
Tensorflow mobile video object detection implementation pr
o
posed in the following paper:
Mobile Video Object Detection with Temporally-Aware Feature Maps (CVPR 2018).
http://openaccess.thecvf.com/content_cvpr_2018/papers/Liu_Mobile_Video_Object_CVPR_2018_paper.pdf
...
...
research/lstm_object_detection/eval.py
View file @
65da497f
...
...
@@ -31,7 +31,7 @@ from google3.pyglib import app
from
google3.pyglib
import
flags
from
lstm_object_detection
import
evaluator
from
lstm_object_detection
import
model_builder
from
lstm_object_detection
import
seq_dataset_builder
from
lstm_object_detection
.inputs
import
seq_dataset_builder
from
lstm_object_detection.utils
import
config_util
from
object_detection.utils
import
label_map_util
...
...
research/lstm_object_detection/seq_dataset_builder.py
→
research/lstm_object_detection/
inputs/
seq_dataset_builder.py
View file @
65da497f
...
...
@@ -25,7 +25,7 @@ that wraps the build function.
import
tensorflow
as
tf
import
tensorflow.google
as
google_tf
from
tensorflow.contrib.training.python.training
import
sequence_queueing_state_saver
as
sqss
from
lstm_object_detection
import
tf_sequence_example_decoder
from
lstm_object_detection
.inputs
import
tf_sequence_example_decoder
from
lstm_object_detection.protos
import
input_reader_google_pb2
from
object_detection.core
import
preprocessor
from
object_detection.core
import
preprocessor_cache
...
...
research/lstm_object_detection/seq_dataset_builder_test.py
→
research/lstm_object_detection/
inputs/
seq_dataset_builder_test.py
View file @
65da497f
...
...
@@ -23,7 +23,7 @@ from google.protobuf import text_format
from
google3.testing.pybase
import
parameterized
from
tensorflow.core.example
import
example_pb2
from
tensorflow.core.example
import
feature_pb2
from
lstm_object_detection
import
seq_dataset_builder
from
lstm_object_detection
.inputs
import
seq_dataset_builder
from
lstm_object_detection.protos
import
pipeline_pb2
as
internal_pipeline_pb2
from
object_detection.builders
import
preprocessor_builder
from
object_detection.core
import
standard_fields
as
fields
...
...
research/lstm_object_detection/tf_sequence_example_decoder.py
→
research/lstm_object_detection/
inputs/
tf_sequence_example_decoder.py
View file @
65da497f
File moved
Prev
1
…
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment