Commit a00f7e2b authored by Dan Abolafia's avatar Dan Abolafia
Browse files

Open source release of Brain Coder.

parent 54babf62
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
/research/attention_ocr/ @alexgorban /research/attention_ocr/ @alexgorban
/research/audioset/ @plakal @dpwe /research/audioset/ @plakal @dpwe
/research/autoencoders/ @snurkabill /research/autoencoders/ @snurkabill
/research/brain_coder/ @danabo
/research/cognitive_mapping_and_planning/ @s-gupta /research/cognitive_mapping_and_planning/ @s-gupta
/research/compression/ @nmjohn /research/compression/ @nmjohn
/research/delf/ @andrefaraujo /research/delf/ @andrefaraujo
......
...@@ -20,6 +20,7 @@ installation](https://www.tensorflow.org/install). ...@@ -20,6 +20,7 @@ installation](https://www.tensorflow.org/install).
- [audioset](audioset): Models and supporting code for use with - [audioset](audioset): Models and supporting code for use with
[AudioSet](http://g.co/audioset). [AudioSet](http://g.co/audioset).
- [autoencoder](autoencoder): various autoencoders. - [autoencoder](autoencoder): various autoencoders.
- [brain_coder](brain_coder): Program synthesis with reinforcement learning.
- [cognitive_mapping_and_planning](cognitive_mapping_and_planning): - [cognitive_mapping_and_planning](cognitive_mapping_and_planning):
implementation of a spatial memory based mapping and planning architecture implementation of a spatial memory based mapping and planning architecture
for visual navigation. for visual navigation.
......
# Brain Coder
*Authors: Daniel Abolafia, Quoc Le, Mohammad Norouzi*
Brain coder is a code synthesis experimental environment. We provide code that reproduces the results from our recent paper [Code Synthesis with Priority Queue Training](https://openreview.net/forum?id=r1AoGNlC-). See single_task/README.md for details on how to build and reproduce those experiments.
## Installation
First install dependencies seperately:
* [bazel](https://docs.bazel.build/versions/master/install.html)
* [TensorFlow](https://www.tensorflow.org/install/)
* [scipy](https://www.scipy.org/install.html)
* [absl-py](https://github.com/abseil/abseil-py)
Note: even if you already have these dependencies installed, make sure they are
up-to-date to avoid unnecessary debugging.
## Building
Use bazel from the top-level repo directory.
For example:
```bash
bazel build single_task:run
```
View README.md files in subdirectories for more details.
git_repository(
name = "subpar",
remote = "https://github.com/google/subpar",
tag = "1.0.0",
)
licenses(["notice"])
package(default_visibility = [
"//:__subpackages__",
])
py_library(
name = "bf",
srcs = ["bf.py"],
)
py_test(
name = "bf_test",
srcs = ["bf_test.py"],
deps = [
":bf",
# tensorflow dep
],
)
py_library(
name = "config_lib",
srcs = ["config_lib.py"],
)
py_test(
name = "config_lib_test",
srcs = ["config_lib_test.py"],
deps = [
":config_lib",
# tensorflow dep
],
)
py_library(
name = "reward",
srcs = ["reward.py"],
)
py_test(
name = "reward_test",
srcs = ["reward_test.py"],
deps = [
":reward",
# numpy dep
# tensorflow dep
],
)
py_library(
name = "rollout",
srcs = ["rollout.py"],
deps = [
":utils",
# numpy dep
# scipy dep
],
)
py_test(
name = "rollout_test",
srcs = ["rollout_test.py"],
deps = [
":rollout",
# numpy dep
# tensorflow dep
],
)
py_library(
name = "schedules",
srcs = ["schedules.py"],
deps = [":config_lib"],
)
py_test(
name = "schedules_test",
srcs = ["schedules_test.py"],
deps = [
":config_lib",
":schedules",
# numpy dep
# tensorflow dep
],
)
py_library(
name = "utils",
srcs = ["utils.py"],
deps = [
# file dep
# absl dep /logging
# numpy dep
# tensorflow dep
],
)
py_test(
name = "utils_test",
srcs = ["utils_test.py"],
deps = [
":utils",
# numpy dep
# tensorflow dep
],
)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""BrainF**k interpreter.
Language info: https://en.wikipedia.org/wiki/Brainfuck
Based on public implementation:
https://github.com/pocmo/Python-Brainfuck/blob/master/brainfuck.py
"""
from collections import namedtuple
import time
EvalResult = namedtuple(
'EvalResult', ['output', 'success', 'failure_reason', 'steps', 'time',
'memory', 'program_trace'])
ExecutionSnapshot = namedtuple(
'ExecutionSnapshot',
['codeptr', 'codechar', 'memptr', 'memval', 'memory', 'next_input',
'output_buffer'])
class Status(object):
SUCCESS = 'success'
TIMEOUT = 'timeout'
STEP_LIMIT = 'step-limit'
SYNTAX_ERROR = 'syntax-error'
CHARS = INT_TO_CHAR = ['>', '<', '+', '-', '[', ']', '.', ',']
CHAR_TO_INT = dict([(c, i) for i, c in enumerate(INT_TO_CHAR)])
class LookAheadIterator(object):
"""Same API as Python iterator, with additional peek method."""
def __init__(self, iterable):
self._it = iter(iterable)
self._current_element = None
self._done = False
self._preload_next()
def _preload_next(self):
try:
self._current_element = self._it.next()
except StopIteration:
self._done = True
def next(self):
if self._done:
raise StopIteration
element = self._current_element
self._preload_next()
return element
def peek(self, default_value=None):
if self._done:
if default_value is None:
raise StopIteration
return default_value
return self._current_element
def buildbracemap(code):
"""Build jump map.
Args:
code: List or string or BF chars.
Returns:
bracemap: dict mapping open and close brace positions in the code to their
destination jumps. Specifically, positions of matching open/close braces
if they exist.
correct_syntax: True if all braces match. False if there are unmatched
braces in the code. Even if there are unmatched braces, a bracemap will
be built, and unmatched braces will map to themselves.
"""
bracestack, bracemap = [], {}
correct_syntax = True
for position, command in enumerate(code):
if command == '[':
bracestack.append(position)
if command == ']':
if not bracestack: # Unmatched closing brace.
bracemap[position] = position # Don't jump to any position.
correct_syntax = False
continue
start = bracestack.pop()
bracemap[start] = position
bracemap[position] = start
if bracestack: # Unmatched opening braces.
for pos in bracestack:
bracemap[pos] = pos # Don't jump to any position.
correct_syntax = False
return bracemap, correct_syntax
def evaluate(code, input_buffer=None, init_memory=None, base=256, timeout=1.0,
max_steps=None, require_correct_syntax=True, output_memory=False,
debug=False):
"""Execute BF code.
Args:
code: String or list of BF characters. Any character not in CHARS will be
ignored.
input_buffer: A list of ints which will be used as the program's input
stream. Each read op "," will read an int from this list. 0's will be
read once the end of the list is reached, or if no input buffer is
given.
init_memory: A list of ints. Memory for first k positions will be
initialized to this list (where k = len(init_memory)). Memory positions
are initialized to 0 by default.
base: Integer base for the memory. When a memory value is incremented to
`base` it will overflow to 0. When a memory value is decremented to -1
it will underflow to `base` - 1.
timeout: Time limit for program execution in seconds. Set to None to
disable.
max_steps: Execution step limit. An execution step is the execution of one
operation (code character), even if that op has been executed before.
Execution exits when this many steps are reached. Set to None to
disable. Disabled by default.
require_correct_syntax: If True, unmatched braces will cause `evaluate` to
return without executing the code. The failure reason will be
`Status.SYNTAX_ERROR`. If False, unmatched braces are ignored
and execution will continue.
output_memory: If True, the state of the memory at the end of execution is
returned.
debug: If True, then a full program trace will be returned.
Returns:
EvalResult namedtuple containing
output: List of ints which were written out by the program with the "."
operation.
success: Boolean. Whether execution completed successfully.
failure_reason: One of the attributes of `Status`. Gives extra info
about why execution was not successful.
steps: Number of execution steps the program ran for.
time: Amount of time in seconds the program ran for.
memory: If `output_memory` is True, a list of memory cells up to the last
one written to. otherwise, None.
"""
input_iter = (
LookAheadIterator(input_buffer) if input_buffer is not None
else LookAheadIterator([]))
# Null memory value. This is the value of an empty memory. Also the value
# returned by the read operation when the input buffer is empty, or the
# end of the buffer is reached.
null_value = 0
code = list(code)
bracemap, correct_syntax = buildbracemap(code) # will modify code list
if require_correct_syntax and not correct_syntax:
return EvalResult([], False, Status.SYNTAX_ERROR, 0, 0.0,
[] if output_memory else None, [] if debug else None)
output_buffer = []
codeptr, cellptr = 0, 0
cells = list(init_memory) if init_memory else [0]
program_trace = [] if debug else None
success = True
reason = Status.SUCCESS
start_time = time.time()
steps = 0
while codeptr < len(code):
command = code[codeptr]
if debug:
# Add step to program trace.
program_trace.append(ExecutionSnapshot(
codeptr=codeptr, codechar=command, memptr=cellptr,
memval=cells[cellptr], memory=list(cells),
next_input=input_iter.peek(null_value),
output_buffer=list(output_buffer)))
if command == '>':
cellptr += 1
if cellptr == len(cells): cells.append(null_value)
if command == '<':
cellptr = 0 if cellptr <= 0 else cellptr - 1
if command == '+':
cells[cellptr] = cells[cellptr] + 1 if cells[cellptr] < (base - 1) else 0
if command == '-':
cells[cellptr] = cells[cellptr] - 1 if cells[cellptr] > 0 else (base - 1)
if command == '[' and cells[cellptr] == 0: codeptr = bracemap[codeptr]
if command == ']' and cells[cellptr] != 0: codeptr = bracemap[codeptr]
if command == '.': output_buffer.append(cells[cellptr])
if command == ',': cells[cellptr] = next(input_iter, null_value)
codeptr += 1
steps += 1
if timeout is not None and time.time() - start_time > timeout:
success = False
reason = Status.TIMEOUT
break
if max_steps is not None and steps >= max_steps:
success = False
reason = Status.STEP_LIMIT
break
if debug:
# Add step to program trace.
command = code[codeptr] if codeptr < len(code) else ''
program_trace.append(ExecutionSnapshot(
codeptr=codeptr, codechar=command, memptr=cellptr,
memval=cells[cellptr], memory=list(cells),
next_input=input_iter.peek(null_value),
output_buffer=list(output_buffer)))
return EvalResult(
output=output_buffer,
success=success,
failure_reason=reason,
steps=steps,
time=time.time() - start_time,
memory=cells if output_memory else None,
program_trace=program_trace)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Tests for common.bf."""
import tensorflow as tf
from common import bf # brain coder
class BfTest(tf.test.TestCase):
def assertCorrectOutput(self, target_output, eval_result):
self.assertEqual(target_output, eval_result.output)
self.assertTrue(eval_result.success)
self.assertEqual(bf.Status.SUCCESS, eval_result.failure_reason)
def testBasicOps(self):
self.assertCorrectOutput(
[3, 1, 2],
bf.evaluate('+++.--.+.'))
self.assertCorrectOutput(
[1, 1, 2],
bf.evaluate('+.<.>++.'))
self.assertCorrectOutput(
[0],
bf.evaluate('+,.'))
self.assertCorrectOutput(
[ord(char) for char in 'Hello World!\n'],
bf.evaluate(
'>++++++++[-<+++++++++>]<.>>+>-[+]++>++>+++[>[->+++<<+++>]<<]>-----'
'.>->+++..+++.>-.<<+[>[+>+]>>]<--------------.>>.+++.------.-------'
'-.>+.>+.'))
def testBase(self):
self.assertCorrectOutput(
[1, 4],
bf.evaluate('+.--.', base=5, input_buffer=[]))
def testInputBuffer(self):
self.assertCorrectOutput(
[2, 3, 4],
bf.evaluate('>,[>,]<[.<]', input_buffer=[4, 3, 2]))
def testBadChars(self):
self.assertCorrectOutput(
[2, 3, 4],
bf.evaluate('>,[>,]hello<world[.<]comments',
input_buffer=[4, 3, 2]))
def testUnmatchedBraces(self):
self.assertCorrectOutput(
[3, 6, 1],
bf.evaluate('+++.]]]]>----.[[[[[>+.',
input_buffer=[],
base=10,
require_correct_syntax=False))
eval_result = bf.evaluate(
'+++.]]]]>----.[[[[[>+.',
input_buffer=[],
base=10,
require_correct_syntax=True)
self.assertEqual([], eval_result.output)
self.assertFalse(eval_result.success)
self.assertEqual(bf.Status.SYNTAX_ERROR,
eval_result.failure_reason)
def testTimeout(self):
er = bf.evaluate('+.[].', base=5, input_buffer=[], timeout=0.1)
self.assertEqual(
([1], False, bf.Status.TIMEOUT),
(er.output, er.success, er.failure_reason))
self.assertTrue(0.07 < er.time < 0.21)
er = bf.evaluate('+.[-].', base=5, input_buffer=[], timeout=0.1)
self.assertEqual(
([1, 0], True, bf.Status.SUCCESS),
(er.output, er.success, er.failure_reason))
self.assertTrue(er.time < 0.15)
def testMaxSteps(self):
er = bf.evaluate('+.[].', base=5, input_buffer=[], timeout=None,
max_steps=100)
self.assertEqual(
([1], False, bf.Status.STEP_LIMIT, 100),
(er.output, er.success, er.failure_reason, er.steps))
er = bf.evaluate('+.[-].', base=5, input_buffer=[], timeout=None,
max_steps=100)
self.assertEqual(
([1, 0], True, bf.Status.SUCCESS),
(er.output, er.success, er.failure_reason))
self.assertTrue(er.steps < 100)
def testOutputMemory(self):
er = bf.evaluate('+>++>+++>++++.', base=256, input_buffer=[],
output_memory=True)
self.assertEqual(
([4], True, bf.Status.SUCCESS),
(er.output, er.success, er.failure_reason))
self.assertEqual([1, 2, 3, 4], er.memory)
def testProgramTrace(self):
es = bf.ExecutionSnapshot
er = bf.evaluate(',[.>,].', base=256, input_buffer=[2, 1], debug=True)
self.assertEqual(
[es(codeptr=0, codechar=',', memptr=0, memval=0, memory=[0],
next_input=2, output_buffer=[]),
es(codeptr=1, codechar='[', memptr=0, memval=2, memory=[2],
next_input=1, output_buffer=[]),
es(codeptr=2, codechar='.', memptr=0, memval=2, memory=[2],
next_input=1, output_buffer=[]),
es(codeptr=3, codechar='>', memptr=0, memval=2, memory=[2],
next_input=1, output_buffer=[2]),
es(codeptr=4, codechar=',', memptr=1, memval=0, memory=[2, 0],
next_input=1, output_buffer=[2]),
es(codeptr=5, codechar=']', memptr=1, memval=1, memory=[2, 1],
next_input=0, output_buffer=[2]),
es(codeptr=2, codechar='.', memptr=1, memval=1, memory=[2, 1],
next_input=0, output_buffer=[2]),
es(codeptr=3, codechar='>', memptr=1, memval=1, memory=[2, 1],
next_input=0, output_buffer=[2, 1]),
es(codeptr=4, codechar=',', memptr=2, memval=0, memory=[2, 1, 0],
next_input=0, output_buffer=[2, 1]),
es(codeptr=5, codechar=']', memptr=2, memval=0, memory=[2, 1, 0],
next_input=0, output_buffer=[2, 1]),
es(codeptr=6, codechar='.', memptr=2, memval=0, memory=[2, 1, 0],
next_input=0, output_buffer=[2, 1]),
es(codeptr=7, codechar='', memptr=2, memval=0, memory=[2, 1, 0],
next_input=0, output_buffer=[2, 1, 0])],
er.program_trace)
if __name__ == '__main__':
tf.test.main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Objects for storing configuration and passing config into binaries.
Config class stores settings and hyperparameters for models, data, and anything
else that may be specific to a particular run.
"""
import ast
import itertools
class Config(dict):
"""Stores model configuration, hyperparameters, or dataset parameters."""
def __getattr__(self, attr):
return self[attr]
def __setattr__(self, attr, value):
self[attr] = value
def pretty_str(self, new_lines=True, indent=2, final_indent=0):
prefix = (' ' * indent) if new_lines else ''
final_prefix = (' ' * final_indent) if new_lines else ''
kv = ['%s%s=%s' % (prefix, k,
(repr(v) if not isinstance(v, Config)
else v.pretty_str(new_lines=new_lines,
indent=indent+2,
final_indent=indent)))
for k, v in self.items()]
if new_lines:
return 'Config(\n%s\n%s)' % (',\n'.join(kv), final_prefix)
else:
return 'Config(%s)' % ', '.join(kv)
def _update_iterator(self, *args, **kwargs):
"""Convert mixed input into an iterator over (key, value) tuples.
Follows the dict.update call signature.
Args:
*args: (Optional) Pass a dict or iterable of (key, value) 2-tuples as
an unnamed argument. Only one unnamed argument allowed.
**kwargs: (Optional) Pass (key, value) pairs as named arguments, where the
argument name is the key and the argument value is the value.
Returns:
An iterator over (key, value) tuples given in the input.
Raises:
TypeError: If more than one unnamed argument is given.
"""
if len(args) > 1:
raise TypeError('Expected at most 1 unnamed arguments, got %d'
% len(args))
obj = args[0] if args else dict()
if isinstance(obj, dict):
return itertools.chain(obj.items(), kwargs.items())
# Assume obj is an iterable of 2-tuples.
return itertools.chain(obj, kwargs.items())
def make_default(self, keys=None):
"""Convert OneOf objects into their default configs.
Recursively calls into Config objects.
Args:
keys: Iterable of key names to check. If None, all keys in self will be
used.
"""
if keys is None:
keys = self.keys()
for k in keys:
# Replace OneOf with its default value.
if isinstance(self[k], OneOf):
self[k] = self[k].default()
# Recursively call into all Config objects, even those that came from
# OneOf objects in the previous code line (for nested OneOf objects).
if isinstance(self[k], Config):
self[k].make_default()
def update(self, *args, **kwargs):
"""Same as dict.update except nested Config objects are updated.
Args:
*args: (Optional) Pass a dict or list of (key, value) 2-tuples as unnamed
argument.
**kwargs: (Optional) Pass (key, value) pairs as named arguments, where the
argument name is the key and the argument value is the value.
"""
key_set = set(self.keys())
for k, v in self._update_iterator(*args, **kwargs):
if k in key_set:
key_set.remove(k) # This key is updated so exclude from make_default.
if k in self and isinstance(self[k], Config) and isinstance(v, dict):
self[k].update(v)
elif k in self and isinstance(self[k], OneOf) and isinstance(v, dict):
# Replace OneOf with the chosen config.
self[k] = self[k].update(v)
else:
self[k] = v
self.make_default(key_set)
def strict_update(self, *args, **kwargs):
"""Same as Config.update except keys and types are not allowed to change.
If a given key is not already in this instance, an exception is raised. If a
given value does not have the same type as the existing value for the same
key, an exception is raised. Use this method to catch config mistakes.
Args:
*args: (Optional) Pass a dict or list of (key, value) 2-tuples as unnamed
argument.
**kwargs: (Optional) Pass (key, value) pairs as named arguments, where the
argument name is the key and the argument value is the value.
Raises:
TypeError: If more than one unnamed argument is given.
TypeError: If new value type does not match existing type.
KeyError: If a given key is not already defined in this instance.
"""
key_set = set(self.keys())
for k, v in self._update_iterator(*args, **kwargs):
if k in self:
key_set.remove(k) # This key is updated so exclude from make_default.
if isinstance(self[k], Config):
if not isinstance(v, dict):
raise TypeError('dict required for Config value, got %s' % type(v))
self[k].strict_update(v)
elif isinstance(self[k], OneOf):
if not isinstance(v, dict):
raise TypeError('dict required for OneOf value, got %s' % type(v))
# Replace OneOf with the chosen config.
self[k] = self[k].strict_update(v)
else:
if not isinstance(v, type(self[k])):
raise TypeError('Expecting type %s for key %s, got type %s'
% (type(self[k]), k, type(v)))
self[k] = v
else:
raise KeyError(
'Key %s does not exist. New key creation not allowed in '
'strict_update.' % k)
self.make_default(key_set)
@staticmethod
def from_str(config_str):
"""Inverse of Config.__str__."""
parsed = ast.literal_eval(config_str)
assert isinstance(parsed, dict)
def _make_config(dictionary):
for k, v in dictionary.items():
if isinstance(v, dict):
dictionary[k] = _make_config(v)
return Config(**dictionary)
return _make_config(parsed)
@staticmethod
def parse(key_val_string):
"""Parse hyperparameter string into Config object.
Format is 'key=val,key=val,...'
Values can be any python literal, or another Config object encoded as
'c(key=val,key=val,...)'.
c(...) expressions can be arbitrarily nested.
Example:
'a=1,b=3e-5,c=[1,2,3],d="hello world",e={"a":1,"b":2},f=c(x=1,y=[10,20])'
Args:
key_val_string: The hyperparameter string.
Returns:
Config object parsed from the input string.
"""
if not key_val_string.strip():
return Config()
def _pair_to_kv(pair):
split_index = pair.find('=')
key, val = pair[:split_index].strip(), pair[split_index+1:].strip()
if val.startswith('c(') and val.endswith(')'):
val = Config.parse(val[2:-1])
else:
val = ast.literal_eval(val)
return key, val
return Config(**dict([_pair_to_kv(pair)
for pair in _comma_iterator(key_val_string)]))
class OneOf(object):
"""Stores branching config.
In some cases there may be options which each have their own set of config
params. For example, if specifying config for an environment, each environment
can have custom config options. OneOf is a way to organize branching config.
Usage example:
one_of = OneOf(
[Config(a=1, b=2),
Config(a=2, c='hello'),
Config(a=3, d=10, e=-10)],
a=1)
config = one_of.strict_update(Config(a=3, d=20))
config == {'a': 3, 'd': 20, 'e': -10}
"""
def __init__(self, choices, **kwargs):
"""Constructor.
Usage: OneOf([Config(...), Config(...), ...], attribute=default_value)
Args:
choices: An iterable of Config objects. When update/strict_update is
called on this OneOf, one of these Config will be selected.
**kwargs: Give exactly one config attribute to branch on. The value of
this attribute during update/strict_update will determine which
Config is used.
Raises:
ValueError: If kwargs does not contain exactly one entry. Should give one
named argument which is used as the attribute to condition on.
"""
if len(kwargs) != 1:
raise ValueError(
'Incorrect usage. Must give exactly one named argument. The argument '
'name is the config attribute to condition on, and the argument '
'value is the default choice. Got %d named arguments.' % len(kwargs))
key, default_value = kwargs.items()[0]
self.key = key
self.default_value = default_value
# Make sure each choice is a Config object.
for config in choices:
if not isinstance(config, Config):
raise TypeError('choices must be a list of Config objects. Got %s.'
% type(config))
# Map value for key to the config with that value.
self.value_map = {config[key]: config for config in choices}
self.default_config = self.value_map[self.default_value]
# Make sure there are no duplicate values.
if len(self.value_map) != len(choices):
raise ValueError('Multiple choices given for the same value of %s.' % key)
# Check that the default value is valid.
if self.default_value not in self.value_map:
raise ValueError(
'Default value is not an available choice. Got %s=%s. Choices are %s.'
% (key, self.default_value, self.value_map.keys()))
def default(self):
return self.default_config
def update(self, other):
"""Choose a config and update it.
If `other` is a Config, one of the config choices is selected and updated.
Otherwise `other` is returned.
Args:
other: Will update chosen config with this value by calling `update` on
the config.
Returns:
The chosen config after updating it, or `other` if no config could be
selected.
"""
if not isinstance(other, Config):
return other
if self.key not in other or other[self.key] not in self.value_map:
return other
target = self.value_map[other[self.key]]
target.update(other)
return target
def strict_update(self, config):
"""Choose a config and update it.
`config` must be a Config object. `config` must have the key used to select
among the config choices, and that key must have a value which one of the
config choices has.
Args:
config: A Config object. the chosen config will be update by calling
`strict_update`.
Returns:
The chosen config after updating it.
Raises:
TypeError: If `config` is not a Config instance.
ValueError: If `config` does not have the branching key in its key set.
ValueError: If the value of the config's branching key is not one of the
valid choices.
"""
if not isinstance(config, Config):
raise TypeError('Expecting Config instance, got %s.' % type(config))
if self.key not in config:
raise ValueError(
'Branching key %s required but not found in %s' % (self.key, config))
if config[self.key] not in self.value_map:
raise ValueError(
'Value %s for key %s is not a possible choice. Choices are %s.'
% (config[self.key], self.key, self.value_map.keys()))
target = self.value_map[config[self.key]]
target.strict_update(config)
return target
def _next_comma(string, start_index):
"""Finds the position of the next comma not used in a literal collection."""
paren_count = 0
for i in xrange(start_index, len(string)):
c = string[i]
if c == '(' or c == '[' or c == '{':
paren_count += 1
elif c == ')' or c == ']' or c == '}':
paren_count -= 1
if paren_count == 0 and c == ',':
return i
return -1
def _comma_iterator(string):
index = 0
while 1:
next_index = _next_comma(string, index)
if next_index == -1:
yield string[index:]
return
yield string[index:next_index]
index = next_index + 1
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Tests for common.config_lib."""
import tensorflow as tf
from common import config_lib # brain coder
class ConfigLibTest(tf.test.TestCase):
def testConfig(self):
config = config_lib.Config(hello='world', foo='bar', num=123, f=56.7)
self.assertEqual('world', config.hello)
self.assertEqual('bar', config['foo'])
config.hello = 'everyone'
config['bar'] = 9000
self.assertEqual('everyone', config['hello'])
self.assertEqual(9000, config.bar)
self.assertEqual(5, len(config))
def testConfigUpdate(self):
config = config_lib.Config(a=1, b=2, c=3)
config.update({'b': 10, 'd': 4})
self.assertEqual({'a': 1, 'b': 10, 'c': 3, 'd': 4}, config)
config = config_lib.Config(a=1, b=2, c=3)
config.update(b=10, d=4)
self.assertEqual({'a': 1, 'b': 10, 'c': 3, 'd': 4}, config)
config = config_lib.Config(a=1, b=2, c=3)
config.update({'e': 5}, b=10, d=4)
self.assertEqual({'a': 1, 'b': 10, 'c': 3, 'd': 4, 'e': 5}, config)
config = config_lib.Config(
a=1,
b=2,
x=config_lib.Config(
l='a',
y=config_lib.Config(m=1, n=2),
z=config_lib.Config(
q=config_lib.Config(a=10, b=20),
r=config_lib.Config(s=1, t=2))))
config.update(x={'y': {'m': 10}, 'z': {'r': {'s': 5}}})
self.assertEqual(
config_lib.Config(
a=1, b=2,
x=config_lib.Config(
l='a',
y=config_lib.Config(m=10, n=2),
z=config_lib.Config(
q=config_lib.Config(a=10, b=20),
r=config_lib.Config(s=5, t=2)))),
config)
config = config_lib.Config(
foo='bar',
num=100,
x=config_lib.Config(a=1, b=2, c=config_lib.Config(h=10, i=20, j=30)),
y=config_lib.Config(qrs=5, tuv=10),
d={'a': 1, 'b': 2},
l=[1, 2, 3])
config.update(
config_lib.Config(
foo='hat',
num=50.5,
x={'a': 5, 'z': -10},
y=config_lib.Config(wxyz=-1)),
d={'a': 10, 'c': 20},
l=[3, 4, 5, 6])
self.assertEqual(
config_lib.Config(
foo='hat',
num=50.5,
x=config_lib.Config(a=5, b=2, z=-10,
c=config_lib.Config(h=10, i=20, j=30)),
y=config_lib.Config(qrs=5, tuv=10, wxyz=-1),
d={'a': 10, 'c': 20},
l=[3, 4, 5, 6]),
config)
self.assertTrue(isinstance(config.x, config_lib.Config))
self.assertTrue(isinstance(config.x.c, config_lib.Config))
self.assertTrue(isinstance(config.y, config_lib.Config))
config = config_lib.Config(
foo='bar',
num=100,
x=config_lib.Config(a=1, b=2, c=config_lib.Config(h=10, i=20, j=30)),
y=config_lib.Config(qrs=5, tuv=10),
d={'a': 1, 'b': 2},
l=[1, 2, 3])
config.update(
config_lib.Config(
foo=1234,
num='hello',
x={'a': 5, 'z': -10, 'c': {'h': -5, 'k': 40}},
y=[1, 2, 3, 4],
d='stuff',
l={'a': 1, 'b': 2}))
self.assertEqual(
config_lib.Config(
foo=1234,
num='hello',
x=config_lib.Config(a=5, b=2, z=-10,
c=config_lib.Config(h=-5, i=20, j=30, k=40)),
y=[1, 2, 3, 4],
d='stuff',
l={'a': 1, 'b': 2}),
config)
self.assertTrue(isinstance(config.x, config_lib.Config))
self.assertTrue(isinstance(config.x.c, config_lib.Config))
self.assertTrue(isinstance(config.y, list))
def testConfigStrictUpdate(self):
config = config_lib.Config(a=1, b=2, c=3)
config.strict_update({'b': 10, 'c': 20})
self.assertEqual({'a': 1, 'b': 10, 'c': 20}, config)
config = config_lib.Config(a=1, b=2, c=3)
config.strict_update(b=10, c=20)
self.assertEqual({'a': 1, 'b': 10, 'c': 20}, config)
config = config_lib.Config(a=1, b=2, c=3, d=4)
config.strict_update({'d': 100}, b=10, a=20)
self.assertEqual({'a': 20, 'b': 10, 'c': 3, 'd': 100}, config)
config = config_lib.Config(
a=1,
b=2,
x=config_lib.Config(
l='a',
y=config_lib.Config(m=1, n=2),
z=config_lib.Config(
q=config_lib.Config(a=10, b=20),
r=config_lib.Config(s=1, t=2))))
config.strict_update(x={'y': {'m': 10}, 'z': {'r': {'s': 5}}})
self.assertEqual(
config_lib.Config(
a=1, b=2,
x=config_lib.Config(
l='a',
y=config_lib.Config(m=10, n=2),
z=config_lib.Config(
q=config_lib.Config(a=10, b=20),
r=config_lib.Config(s=5, t=2)))),
config)
config = config_lib.Config(
foo='bar',
num=100,
x=config_lib.Config(a=1, b=2, c=config_lib.Config(h=10, i=20, j=30)),
y=config_lib.Config(qrs=5, tuv=10),
d={'a': 1, 'b': 2},
l=[1, 2, 3])
config.strict_update(
config_lib.Config(
foo='hat',
num=50,
x={'a': 5, 'c': {'h': 100}},
y=config_lib.Config(tuv=-1)),
d={'a': 10, 'c': 20},
l=[3, 4, 5, 6])
self.assertEqual(
config_lib.Config(
foo='hat',
num=50,
x=config_lib.Config(a=5, b=2,
c=config_lib.Config(h=100, i=20, j=30)),
y=config_lib.Config(qrs=5, tuv=-1),
d={'a': 10, 'c': 20},
l=[3, 4, 5, 6]),
config)
def testConfigStrictUpdateFail(self):
config = config_lib.Config(a=1, b=2, c=3, x=config_lib.Config(a=1, b=2))
with self.assertRaises(KeyError):
config.strict_update({'b': 10, 'c': 20, 'd': 50})
with self.assertRaises(KeyError):
config.strict_update(b=10, d=50)
with self.assertRaises(KeyError):
config.strict_update(x={'c': 3})
with self.assertRaises(TypeError):
config.strict_update(a='string')
with self.assertRaises(TypeError):
config.strict_update(x={'a': 'string'})
with self.assertRaises(TypeError):
config.strict_update(x=[1, 2, 3])
def testConfigFromStr(self):
config = config_lib.Config.from_str("{'c': {'d': 5}, 'b': 2, 'a': 1}")
self.assertEqual(
{'c': {'d': 5}, 'b': 2, 'a': 1}, config)
self.assertTrue(isinstance(config, config_lib.Config))
self.assertTrue(isinstance(config.c, config_lib.Config))
def testConfigParse(self):
config = config_lib.Config.parse(
'hello="world",num=1234.5,lst=[10,20.5,True,"hi",("a","b","c")],'
'dct={9:10,"stuff":"qwerty","subdict":{1:True,2:False}},'
'subconfig=c(a=1,b=[1,2,[3,4]],c=c(f="f",g="g"))')
self.assertEqual(
{'hello': 'world', 'num': 1234.5,
'lst': [10, 20.5, True, 'hi', ('a', 'b', 'c')],
'dct': {9: 10, 'stuff': 'qwerty', 'subdict': {1: True, 2: False}},
'subconfig': {'a': 1, 'b': [1, 2, [3, 4]], 'c': {'f': 'f', 'g': 'g'}}},
config)
self.assertTrue(isinstance(config, config_lib.Config))
self.assertTrue(isinstance(config.subconfig, config_lib.Config))
self.assertTrue(isinstance(config.subconfig.c, config_lib.Config))
self.assertFalse(isinstance(config.dct, config_lib.Config))
self.assertFalse(isinstance(config.dct['subdict'], config_lib.Config))
self.assertTrue(isinstance(config.lst[4], tuple))
def testConfigParseErrors(self):
with self.assertRaises(SyntaxError):
config_lib.Config.parse('a=[1,2,b="hello"')
with self.assertRaises(SyntaxError):
config_lib.Config.parse('a=1,b=c(x="a",y="b"')
with self.assertRaises(SyntaxError):
config_lib.Config.parse('a=1,b=c(x="a")y="b"')
with self.assertRaises(SyntaxError):
config_lib.Config.parse('a=1,b=c(x="a"),y="b",')
def testOneOf(self):
def make_config():
return config_lib.Config(
data=config_lib.OneOf(
[config_lib.Config(task=1, a='hello'),
config_lib.Config(task=2, a='world', b='stuff'),
config_lib.Config(task=3, c=1234)],
task=2),
model=config_lib.Config(stuff=1))
config = make_config()
config.update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=1,a="hi")'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=1, a='hi'),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=2,a="hi")'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=2, a='hi', b='stuff'),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=3)'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=3, c=1234),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.update(config_lib.Config.parse(
'model=c(stuff=2)'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=2, a='world', b='stuff'),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=4,d=9999)'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=4, d=9999),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.update(config_lib.Config.parse(
'model=c(stuff=2),data=5'))
self.assertEqual(
config_lib.Config(
data=5,
model=config_lib.Config(stuff=2)),
config)
def testOneOfStrict(self):
def make_config():
return config_lib.Config(
data=config_lib.OneOf(
[config_lib.Config(task=1, a='hello'),
config_lib.Config(task=2, a='world', b='stuff'),
config_lib.Config(task=3, c=1234)],
task=2),
model=config_lib.Config(stuff=1))
config = make_config()
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=1,a="hi")'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=1, a='hi'),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=2,a="hi")'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=2, a='hi', b='stuff'),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=3)'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=3, c=1234),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2)'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(task=2, a='world', b='stuff'),
model=config_lib.Config(stuff=2)),
config)
def testNestedOneOf(self):
def make_config():
return config_lib.Config(
data=config_lib.OneOf(
[config_lib.Config(task=1, a='hello'),
config_lib.Config(
task=2,
a=config_lib.OneOf(
[config_lib.Config(x=1, y=2),
config_lib.Config(x=-1, y=1000, z=4)],
x=1)),
config_lib.Config(task=3, c=1234)],
task=2),
model=config_lib.Config(stuff=1))
config = make_config()
config.update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=2,a=c(x=-1,z=8))'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(
task=2,
a=config_lib.Config(x=-1, y=1000, z=8)),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=2,a=c(x=-1,z=8))'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(
task=2,
a=config_lib.Config(x=-1, y=1000, z=8)),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.update(config_lib.Config.parse('model=c(stuff=2)'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(
task=2,
a=config_lib.Config(x=1, y=2)),
model=config_lib.Config(stuff=2)),
config)
config = make_config()
config.strict_update(config_lib.Config.parse('model=c(stuff=2)'))
self.assertEqual(
config_lib.Config(
data=config_lib.Config(
task=2,
a=config_lib.Config(x=1, y=2)),
model=config_lib.Config(stuff=2)),
config)
def testOneOfStrictErrors(self):
def make_config():
return config_lib.Config(
data=config_lib.OneOf(
[config_lib.Config(task=1, a='hello'),
config_lib.Config(task=2, a='world', b='stuff'),
config_lib.Config(task=3, c=1234)],
task=2),
model=config_lib.Config(stuff=1))
config = make_config()
with self.assertRaises(TypeError):
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2),data=[1,2,3]'))
config = make_config()
with self.assertRaises(KeyError):
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=3,c=5678,d=9999)'))
config = make_config()
with self.assertRaises(ValueError):
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2),data=c(task=4,d=9999)'))
config = make_config()
with self.assertRaises(TypeError):
config.strict_update(config_lib.Config.parse(
'model=c(stuff=2),data=5'))
if __name__ == '__main__':
tf.test.main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Reward functions, distance functions, and reward managers."""
from abc import ABCMeta
from abc import abstractmethod
from math import log
# All sequences here are assumed to be lists of ints bounded
# between 0 and `base`-1 (inclusive).
#################################
### Scalar Distance Functions ###
#################################
def abs_diff(a, b, base=0):
"""Absolute value of difference between scalars.
abs_diff is symmetric, i.e. `a` and `b` are interchangeable.
Args:
a: First argument. An int.
b: Seconds argument. An int.
base: Dummy argument so that the argument signature matches other scalar
diff functions. abs_diff is the same in all bases.
Returns:
abs(a - b).
"""
del base # Unused.
return abs(a - b)
def mod_abs_diff(a, b, base):
"""Shortest distance between `a` and `b` in the modular integers base `base`.
The smallest distance between a and b is returned.
Example: mod_abs_diff(1, 99, 100) ==> 2. It is not 98.
mod_abs_diff is symmetric, i.e. `a` and `b` are interchangeable.
Args:
a: First argument. An int.
b: Seconds argument. An int.
base: The modulo base. A positive int.
Returns:
Shortest distance.
"""
diff = abs(a - b)
if diff >= base:
diff %= base
return min(diff, (-diff) + base)
###############################
### List Distance Functions ###
###############################
def absolute_distance(pred, target, base, scalar_diff_fn=abs_diff):
"""Asymmetric list distance function.
List distance is the sum of element-wise distances, like Hamming distance, but
where `pred` can be longer or shorter than `target`. For each position in both
`pred` and `target`, distance between those elements is computed with
`scalar_diff_fn`. For missing or extra elements in `pred`, the maximum
distance is assigned, which is equal to `base`.
Distance is 0 when `pred` and `target` are identical, and will be a positive
integer when they are not.
Args:
pred: Prediction list. Distance from this list is computed.
target: Target list. Distance to this list is computed.
base: The integer base to use. For example, a list of chars would use base
256.
scalar_diff_fn: Element-wise distance function.
Returns:
List distance between `pred` and `target`.
"""
d = 0
for i, target_t in enumerate(target):
if i >= len(pred):
d += base # A missing slot is worth the max distance.
else:
# Add element-wise distance for this slot.
d += scalar_diff_fn(pred[i], target_t, base)
if len(pred) > len(target):
# Each extra slot is worth the max distance.
d += (len(pred) - len(target)) * base
return d
def log_absolute_distance(pred, target, base):
"""Asymmetric list distance function that uses log distance.
A list distance which computes sum of element-wise distances, similar to
`absolute_distance`. Unlike `absolute_distance`, this scales the resulting
distance to be a float.
Element-wise distance are log-scale. Distance between two list changes
relatively less for elements that are far apart, but changes a lot (goes to 0
faster) when values get close together.
Args:
pred: List of ints. Computes distance from this list to the target.
target: List of ints. This is the "correct" list which the prediction list
is trying to match.
base: Integer base.
Returns:
Float distance normalized so that when `pred` is at most as long as `target`
the distance is between 0.0 and 1.0. Distance grows unboundedly large
as `pred` grows past `target` in length.
"""
if not target:
length_normalizer = 1.0
if not pred:
# Distance between [] and [] is 0.0 since they are equal.
return 0.0
else:
length_normalizer = float(len(target))
# max_dist is the maximum element-wise distance, before taking log and
# scaling. Since we use `mod_abs_diff`, it would be (base // 2), but we add
# 1 to it so that missing or extra positions get the maximum penalty.
max_dist = base // 2 + 1
# The log-distance will be scaled by a factor.
# Note: +1 is added to the numerator and denominator to avoid log(0). This
# only has a translational effect, i.e. log(dist + 1) / log(max_dist + 1).
factor = log(max_dist + 1)
d = 0.0 # Total distance to be computed.
for i, target_t in enumerate(target):
if i >= len(pred):
# Assign the max element-wise distance for missing positions. This is 1.0
# after scaling.
d += 1.0
else:
# Add the log-dist divided by a scaling factor.
d += log(mod_abs_diff(pred[i], target_t, base) + 1) / factor
if len(pred) > len(target):
# Add the max element-wise distance for each extra position.
# Since max dist after scaling is 1, this is just the difference in list
# lengths.
d += (len(pred) - len(target))
return d / length_normalizer # Normalize again by the target length.
########################
### Reward Functions ###
########################
# Reward functions assign reward based on program output.
# Warning: only use these functions as the terminal rewards in episodes, i.e.
# for the "final" programs.
def absolute_distance_reward(pred, target, base, scalar_diff_fn=abs_diff):
"""Reward function based on absolute_distance function.
Maximum reward, 1.0, is given when the lists are equal. Reward is scaled
so that 0.0 reward is given when `pred` is the empty list (assuming `target`
is not empty). Reward can go negative when `pred` is longer than `target`.
This is an asymmetric reward function, so which list is the prediction and
which is the target matters.
Args:
pred: Prediction sequence. This should be the sequence outputted by the
generated code. List of ints n, where 0 <= n < base.
target: Target sequence. The correct sequence that the generated code needs
to output. List of ints n, where 0 <= n < base.
base: Base of the computation.
scalar_diff_fn: Element-wise distance function.
Returns:
Reward computed based on `pred` and `target`. A float.
"""
unit_dist = float(base * len(target))
if unit_dist == 0:
unit_dist = base
dist = absolute_distance(pred, target, base, scalar_diff_fn=scalar_diff_fn)
return (unit_dist - dist) / unit_dist
def absolute_mod_distance_reward(pred, target, base):
"""Same as `absolute_distance_reward` but `mod_abs_diff` scalar diff is used.
Args:
pred: Prediction sequence. This should be the sequence outputted by the
generated code. List of ints n, where 0 <= n < base.
target: Target sequence. The correct sequence that the generated code needs
to output. List of ints n, where 0 <= n < base.
base: Base of the computation.
Returns:
Reward computed based on `pred` and `target`. A float.
"""
return absolute_distance_reward(pred, target, base, mod_abs_diff)
def absolute_log_distance_reward(pred, target, base):
"""Compute reward using `log_absolute_distance`.
Maximum reward, 1.0, is given when the lists are equal. Reward is scaled
so that 0.0 reward is given when `pred` is the empty list (assuming `target`
is not empty). Reward can go negative when `pred` is longer than `target`.
This is an asymmetric reward function, so which list is the prediction and
which is the target matters.
This reward function has the nice property that much more reward is given
for getting the correct value (at each position) than for there being any
value at all. For example, in base 100, lets say pred = [1] * 1000
and target = [10] * 1000. A lot of reward would be given for being 80%
accurate (worst element-wise distance is 50, distances here are 9) using
`absolute_distance`. `log_absolute_distance` on the other hand will give
greater and greater reward increments the closer each predicted value gets to
the target. That makes the reward given for accuracy somewhat independant of
the base.
Args:
pred: Prediction sequence. This should be the sequence outputted by the
generated code. List of ints n, where 0 <= n < base.
target: Target sequence. The correct sequence that the generated code needs
to output. List of ints n, where 0 <= n < base.
base: Base of the computation.
Returns:
Reward computed based on `pred` and `target`. A float.
"""
return 1.0 - log_absolute_distance(pred, target, base)
#######################
### Reward Managers ###
#######################
# Reward managers assign reward to many code attempts throughout an episode.
class RewardManager(object):
"""Reward managers administer reward across an episode.
Reward managers are used for "editor" environments. These are environments
where the agent has some way to edit its code over time, and run its code
many time in the same episode, so that it can make incremental improvements.
Reward managers are instantiated with a target sequence, which is the known
correct program output. The manager is called on the output from a proposed
code, and returns reward. If many proposal outputs are tried, reward may be
some stateful function that takes previous tries into account. This is done,
in part, so that an agent cannot accumulate unbounded reward just by trying
junk programs as often as possible. So reward managers should not give the
same reward twice if the next proposal is not better than the last.
"""
__metaclass__ = ABCMeta
def __init__(self, target, base, distance_fn=absolute_distance):
self._target = list(target)
self._base = base
self._distance_fn = distance_fn
@abstractmethod
def __call__(self, sequence):
"""Call this reward manager like a function to get reward.
Calls to reward manager are stateful, and will take previous sequences
into account. Repeated calls with the same sequence may produce different
rewards.
Args:
sequence: List of integers (each between 0 and base - 1). This is the
proposal sequence. Reward will be computed based on the distance
from this sequence to the target (distance function and target are
given in the constructor), as well as previous sequences tried during
the lifetime of this object.
Returns:
Float value. The reward received from this call.
"""
return 0.0
class DeltaRewardManager(RewardManager):
"""Simple reward manager that assigns reward for the net change in distance.
Given some (possibly asymmetric) list distance function, gives reward for
relative changes in prediction distance to the target.
For example, if on the first call the distance is 3.0, the change in distance
is -3 (from starting distance of 0). That relative change will be scaled to
produce a negative reward for this step. On the next call, the distance is 2.0
which is a +1 change, and that will be scaled to give a positive reward.
If the final call has distance 0 (the target is achieved), that is another
positive change of +2. The total reward across all 3 calls is then 0, which is
the highest posible episode total.
Reward is scaled so that the maximum element-wise distance is worth 1.0.
Maximum total episode reward attainable is 0.
"""
def __init__(self, target, base, distance_fn=absolute_distance):
super(DeltaRewardManager, self).__init__(target, base, distance_fn)
self._last_diff = 0
def _diff(self, seq):
return self._distance_fn(seq, self._target, self._base)
def _delta_reward(self, seq):
# Reward is relative to previous sequence diff.
# Reward is scaled so that maximum token difference is worth 1.0.
# Reward = (last_diff - this_diff) / self.base.
# Reward is positive if this sequence is closer to the target than the
# previous sequence, and negative if this sequence is further away.
diff = self._diff(seq)
reward = (self._last_diff - diff) / float(self._base)
self._last_diff = diff
return reward
def __call__(self, seq):
return self._delta_reward(seq)
class FloorRewardManager(RewardManager):
"""Assigns positive reward for each step taken closer to the target.
Given some (possibly asymmetric) list distance function, gives reward for
whenever a new episode minimum distance is reached. No reward is given if
the distance regresses to a higher value, so that the sum of rewards
for the episode is positive.
Reward is scaled so that the maximum element-wise distance is worth 1.0.
Maximum total episode reward attainable is len(target).
If the prediction sequence is longer than the target, a reward of -1 is given.
Subsequence predictions which are also longer get 0 reward. The -1 penalty
will be canceled out with a +1 reward when a prediction is given which is at
most the length of the target.
"""
def __init__(self, target, base, distance_fn=absolute_distance):
super(FloorRewardManager, self).__init__(target, base, distance_fn)
self._last_diff = 0
self._min_diff = self._max_diff()
self._too_long_penality_given = False
def _max_diff(self):
return self._distance_fn([], self._target, self._base)
def _diff(self, seq):
return self._distance_fn(seq, self._target, self._base)
def _delta_reward(self, seq):
# Reward is only given if this sequence is closer to the target than any
# previous sequence.
# Reward is scaled so that maximum token difference is worth 1.0
# Reward = (min_diff - this_diff) / self.base
# Reward is always positive.
diff = self._diff(seq)
if diff < self._min_diff:
reward = (self._min_diff - diff) / float(self._base)
self._min_diff = diff
else:
reward = 0.0
return reward
def __call__(self, seq):
if len(seq) > len(self._target): # Output is too long.
if not self._too_long_penality_given:
self._too_long_penality_given = True
reward = -1.0
else:
reward = 0.0 # Don't give this penalty more than once.
return reward
reward = self._delta_reward(seq)
if self._too_long_penality_given:
reward += 1.0 # Return the subtracted reward.
self._too_long_penality_given = False
return reward
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Tests for common.reward."""
from math import log
import numpy as np
import tensorflow as tf
from common import reward # brain coder
class RewardTest(tf.test.TestCase):
def testAbsDiff(self):
self.assertEqual(5, reward.abs_diff(15, 20))
self.assertEqual(5, reward.abs_diff(20, 15))
def testModAbsDiff(self):
self.assertEqual(5, reward.mod_abs_diff(15, 20, 25))
self.assertEqual(5, reward.mod_abs_diff(20, 15, 25))
self.assertEqual(2, reward.mod_abs_diff(1, 24, 25))
self.assertEqual(2, reward.mod_abs_diff(24, 1, 25))
self.assertEqual(0, reward.mod_abs_diff(0, 0, 5))
self.assertEqual(1, reward.mod_abs_diff(0, 1, 5))
self.assertEqual(2, reward.mod_abs_diff(0, 2, 5))
self.assertEqual(2, reward.mod_abs_diff(0, 3, 5))
self.assertEqual(1, reward.mod_abs_diff(0, 4, 5))
self.assertEqual(0, reward.mod_abs_diff(-1, 4, 5))
self.assertEqual(1, reward.mod_abs_diff(-5, 4, 5))
self.assertEqual(1, reward.mod_abs_diff(-7, 4, 5))
self.assertEqual(1, reward.mod_abs_diff(13, 4, 5))
self.assertEqual(1, reward.mod_abs_diff(15, 4, 5))
def testAbsoluteDistance_AbsDiffMethod(self):
self.assertEqual(
4,
reward.absolute_distance([0], [4], 5, scalar_diff_fn=reward.abs_diff))
self.assertEqual(
0,
reward.absolute_distance([4], [4], 5, scalar_diff_fn=reward.abs_diff))
self.assertEqual(
0,
reward.absolute_distance([], [], 5, scalar_diff_fn=reward.abs_diff))
self.assertEqual(
5,
reward.absolute_distance([1], [], 5, scalar_diff_fn=reward.abs_diff))
self.assertEqual(
5,
reward.absolute_distance([], [1], 5, scalar_diff_fn=reward.abs_diff))
self.assertEqual(
0,
reward.absolute_distance([1, 2, 3], [1, 2, 3], 5,
scalar_diff_fn=reward.abs_diff))
self.assertEqual(
1,
reward.absolute_distance([1, 2, 4], [1, 2, 3], 5,
scalar_diff_fn=reward.abs_diff))
self.assertEqual(
1,
reward.absolute_distance([1, 2, 2], [1, 2, 3], 5,
scalar_diff_fn=reward.abs_diff))
self.assertEqual(
5,
reward.absolute_distance([1, 2], [1, 2, 3], 5,
scalar_diff_fn=reward.abs_diff))
self.assertEqual(
5,
reward.absolute_distance([1, 2, 3, 4], [1, 2, 3], 5,
scalar_diff_fn=reward.abs_diff))
self.assertEqual(
6,
reward.absolute_distance([4, 4, 4], [1, 2, 3], 5,
scalar_diff_fn=reward.abs_diff))
def testAbsoluteDistance_ModDiffMethod(self):
self.assertEqual(
1,
reward.absolute_distance([0], [4], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
0,
reward.absolute_distance([4], [4], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
0,
reward.absolute_distance([], [], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
5,
reward.absolute_distance([1], [], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
5,
reward.absolute_distance([], [1], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
0,
reward.absolute_distance([1, 2, 3], [1, 2, 3], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
1,
reward.absolute_distance([1, 2, 4], [1, 2, 3], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
1,
reward.absolute_distance([1, 2, 2], [1, 2, 3], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
5,
reward.absolute_distance([1, 2], [1, 2, 3], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
5,
reward.absolute_distance([1, 2, 3, 4], [1, 2, 3], 5,
scalar_diff_fn=reward.mod_abs_diff))
self.assertEqual(
5,
reward.absolute_distance([4, 4, 4], [1, 2, 3], 5,
scalar_diff_fn=reward.mod_abs_diff))
def testLogAbsoluteDistance(self):
def log_diff(diff, base):
return log(diff + 1) / log(base // 2 + 2)
self.assertEqual(
log_diff(1, 5),
reward.log_absolute_distance([0], [4], 5))
self.assertEqual(
log_diff(2, 5),
reward.log_absolute_distance([1], [4], 5))
self.assertEqual(
log_diff(2, 5),
reward.log_absolute_distance([2], [4], 5))
self.assertEqual(
log_diff(1, 5),
reward.log_absolute_distance([3], [4], 5))
self.assertEqual(
log_diff(3, 5), # max_dist = base // 2 + 1 = 3
reward.log_absolute_distance([], [4], 5))
self.assertEqual(
0 + log_diff(3, 5), # max_dist = base // 2 + 1 = 3
reward.log_absolute_distance([4, 4], [4], 5))
self.assertEqual(
0,
reward.log_absolute_distance([4], [4], 5))
self.assertEqual(
0,
reward.log_absolute_distance([], [], 5))
self.assertEqual(
1,
reward.log_absolute_distance([1], [], 5))
self.assertEqual(
1,
reward.log_absolute_distance([], [1], 5))
self.assertEqual(
0,
reward.log_absolute_distance([1, 2, 3], [1, 2, 3], 5))
self.assertEqual(
log_diff(1, 5) / 3, # divided by target length.
reward.log_absolute_distance([1, 2, 4], [1, 2, 3], 5))
self.assertEqual(
log_diff(1, 5) / 3,
reward.log_absolute_distance([1, 2, 2], [1, 2, 3], 5))
self.assertEqual(
log_diff(3, 5) / 3, # max_dist
reward.log_absolute_distance([1, 2], [1, 2, 3], 5))
self.assertEqual(
log_diff(3, 5) / 3, # max_dist
reward.log_absolute_distance([1, 2, 3, 4], [1, 2, 3], 5))
# Add log differences for each position.
self.assertEqual(
(log_diff(2, 5) + log_diff(2, 5) + log_diff(1, 5)) / 3,
reward.log_absolute_distance([4, 4, 4], [1, 2, 3], 5))
def testAbsoluteDistanceReward(self):
self.assertEqual(
1,
reward.absolute_distance_reward([1, 2, 3], [1, 2, 3], 5))
self.assertEqual(
1 - 1 / (5 * 3.), # 1 - distance / (base * target_len)
reward.absolute_distance_reward([1, 2, 4], [1, 2, 3], 5))
self.assertEqual(
1 - 1 / (5 * 3.),
reward.absolute_distance_reward([1, 2, 2], [1, 2, 3], 5))
self.assertTrue(np.isclose(
1 - 5 / (5 * 3.),
reward.absolute_distance_reward([1, 2], [1, 2, 3], 5)))
self.assertTrue(np.isclose(
1 - 5 / (5 * 3.),
reward.absolute_distance_reward([1, 2, 3, 4], [1, 2, 3], 5)))
# Add log differences for each position.
self.assertEqual(
1 - (3 + 2 + 1) / (5 * 3.),
reward.absolute_distance_reward([4, 4, 4], [1, 2, 3], 5))
self.assertEqual(
1,
reward.absolute_distance_reward([], [], 5))
def testAbsoluteModDistanceReward(self):
self.assertEqual(
1,
reward.absolute_mod_distance_reward([1, 2, 3], [1, 2, 3], 5))
self.assertEqual(
1 - 1 / (5 * 3.), # 1 - distance / (base * target_len)
reward.absolute_mod_distance_reward([1, 2, 4], [1, 2, 3], 5))
self.assertEqual(
1 - 1 / (5 * 3.),
reward.absolute_mod_distance_reward([1, 2, 2], [1, 2, 3], 5))
self.assertTrue(np.isclose(
1 - 5 / (5 * 3.),
reward.absolute_mod_distance_reward([1, 2], [1, 2, 3], 5)))
self.assertTrue(np.isclose(
1 - 5 / (5 * 3.),
reward.absolute_mod_distance_reward([1, 2, 3, 4], [1, 2, 3], 5)))
# Add log differences for each position.
self.assertTrue(np.isclose(
1 - (2 + 2 + 1) / (5 * 3.),
reward.absolute_mod_distance_reward([4, 4, 4], [1, 2, 3], 5)))
self.assertTrue(np.isclose(
1 - (1 + 2 + 2) / (5 * 3.),
reward.absolute_mod_distance_reward([0, 1, 2], [4, 4, 4], 5)))
self.assertEqual(
1,
reward.absolute_mod_distance_reward([], [], 5))
def testAbsoluteLogDistanceReward(self):
def log_diff(diff, base):
return log(diff + 1) / log(base // 2 + 2)
self.assertEqual(
1,
reward.absolute_log_distance_reward([1, 2, 3], [1, 2, 3], 5))
self.assertEqual(
1 - log_diff(1, 5) / 3, # divided by target length.
reward.absolute_log_distance_reward([1, 2, 4], [1, 2, 3], 5))
self.assertEqual(
1 - log_diff(1, 5) / 3,
reward.absolute_log_distance_reward([1, 2, 2], [1, 2, 3], 5))
self.assertEqual(
1 - log_diff(3, 5) / 3, # max_dist
reward.absolute_log_distance_reward([1, 2], [1, 2, 3], 5))
self.assertEqual(
1 - log_diff(3, 5) / 3, # max_dist
reward.absolute_log_distance_reward([1, 2, 3, 4], [1, 2, 3], 5))
# Add log differences for each position.
self.assertEqual(
1 - (log_diff(2, 5) + log_diff(2, 5) + log_diff(1, 5)) / 3,
reward.absolute_log_distance_reward([4, 4, 4], [1, 2, 3], 5))
self.assertEqual(
1 - (log_diff(1, 5) + log_diff(2, 5) + log_diff(2, 5)) / 3,
reward.absolute_log_distance_reward([0, 1, 2], [4, 4, 4], 5))
self.assertEqual(
1,
reward.absolute_log_distance_reward([], [], 5))
def testDeltaRewardManager(self):
reward_manager = reward.DeltaRewardManager(
[1, 2, 3, 4], base=5, distance_fn=reward.absolute_distance)
self.assertEqual(-3, reward_manager([1]))
self.assertEqual(0, reward_manager([1]))
self.assertEqual(4 / 5., reward_manager([1, 3]))
self.assertEqual(-4 / 5, reward_manager([1]))
self.assertEqual(3, reward_manager([1, 2, 3, 4]))
self.assertEqual(-1, reward_manager([1, 2, 3]))
self.assertEqual(0, reward_manager([1, 2, 3, 4, 3]))
self.assertEqual(-1, reward_manager([1, 2, 3, 4, 3, 2]))
self.assertEqual(2, reward_manager([1, 2, 3, 4]))
self.assertEqual(0, reward_manager([1, 2, 3, 4]))
self.assertEqual(0, reward_manager([1, 2, 3, 4]))
def testFloorRewardMananger(self):
reward_manager = reward.FloorRewardManager(
[1, 2, 3, 4], base=5, distance_fn=reward.absolute_distance)
self.assertEqual(1, reward_manager([1]))
self.assertEqual(0, reward_manager([1]))
self.assertEqual(4 / 5., reward_manager([1, 3]))
self.assertEqual(0, reward_manager([1]))
self.assertEqual(1 / 5., reward_manager([1, 2]))
self.assertEqual(0, reward_manager([0, 1]))
self.assertEqual(0, reward_manager([]))
self.assertEqual(0, reward_manager([1, 2]))
self.assertEqual(2, reward_manager([1, 2, 3, 4]))
self.assertEqual(0, reward_manager([1, 2, 3]))
self.assertEqual(-1, reward_manager([1, 2, 3, 4, 3]))
self.assertEqual(0, reward_manager([1, 2, 3, 4, 3, 2]))
self.assertEqual(1, reward_manager([1, 2, 3, 4]))
self.assertEqual(0, reward_manager([1, 2, 3, 4]))
self.assertEqual(0, reward_manager([1, 2, 3, 4]))
reward_manager = reward.FloorRewardManager(
[1, 2, 3, 4], base=5, distance_fn=reward.absolute_distance)
self.assertEqual(1, reward_manager([1]))
self.assertEqual(-1, reward_manager([1, 0, 0, 0, 0, 0]))
self.assertEqual(0, reward_manager([1, 2, 3, 4, 0, 0]))
self.assertEqual(0, reward_manager([1, 2, 3, 4, 0]))
self.assertEqual(1, reward_manager([]))
self.assertEqual(0, reward_manager([]))
self.assertEqual(0, reward_manager([1]))
self.assertEqual(1, reward_manager([1, 2]))
self.assertEqual(-1, reward_manager([1, 2, 3, 4, 0, 0]))
self.assertEqual(0, reward_manager([1, 1, 1, 1, 1]))
self.assertEqual(1 + 2, reward_manager([1, 2, 3, 4]))
if __name__ == '__main__':
tf.test.main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Utilities related to computing training batches from episode rollouts.
Implementations here are based on code from Open AI:
https://github.com/openai/universe-starter-agent/blob/master/a3c.py.
"""
from collections import namedtuple
import numpy as np
import scipy.signal
from common import utils # brain coder
class Rollout(object):
"""Holds a rollout for an episode.
A rollout is a record of the states observed in some environment and actions
taken by the agent to arrive at those states. Other information includes
rewards received after each action, values estimated for each state, whether
the rollout concluded the episide, and total reward received. Everything
should be given in time order.
At each time t, the agent sees state s_t, takes action a_t, and then receives
reward r_t. The agent may optionally estimate a state value V(s_t) for each
state.
For an episode of length T:
states = [s_0, ..., s_(T-1)]
actions = [a_0, ..., a_(T-1)]
rewards = [r_0, ..., r_(T-1)]
values = [V(s_0), ..., V(s_(T-1))]
Note that there is an extra state s_T observed after taking action a_(T-1),
but this is not included in the rollout.
Rollouts have an `terminated` attribute which is True when the rollout is
"finalized", i.e. it holds a full episode. terminated will be False when
time steps are still being added to it.
"""
def __init__(self):
self.states = []
self.actions = []
self.rewards = []
self.values = []
self.total_reward = 0.0
self.terminated = False
def add(self, state, action, reward, value=0.0, terminated=False):
"""Add the next timestep to this rollout.
Args:
state: The state observed at the start of this timestep.
action: The action taken after observing the given state.
reward: The reward received for taking the given action.
value: The value estimated for the given state.
terminated: Whether this timestep ends the episode.
Raises:
ValueError: If this.terminated is already True, meaning that the episode
has already ended.
"""
if self.terminated:
raise ValueError(
'Trying to add timestep to an already terminal rollout.')
self.states += [state]
self.actions += [action]
self.rewards += [reward]
self.values += [value]
self.terminated = terminated
self.total_reward += reward
def add_many(self, states, actions, rewards, values=None, terminated=False):
"""Add many timesteps to this rollout.
Arguments are the same as `add`, but are lists of equal size.
Args:
states: The states observed.
actions: The actions taken.
rewards: The rewards received.
values: The values estimated for the given states.
terminated: Whether this sequence ends the episode.
Raises:
ValueError: If the lengths of all the input lists are not equal.
ValueError: If this.terminated is already True, meaning that the episode
has already ended.
"""
if len(states) != len(actions):
raise ValueError(
'Number of states and actions must be the same. Got %d states and '
'%d actions' % (len(states), len(actions)))
if len(states) != len(rewards):
raise ValueError(
'Number of states and rewards must be the same. Got %d states and '
'%d rewards' % (len(states), len(rewards)))
if values is not None and len(states) != len(values):
raise ValueError(
'Number of states and values must be the same. Got %d states and '
'%d values' % (len(states), len(values)))
if self.terminated:
raise ValueError(
'Trying to add timesteps to an already terminal rollout.')
self.states += states
self.actions += actions
self.rewards += rewards
self.values += values if values is not None else [0.0] * len(states)
self.terminated = terminated
self.total_reward += sum(rewards)
def extend(self, other):
"""Append another rollout to this rollout."""
assert not self.terminated
self.states.extend(other.states)
self.actions.extend(other.actions)
self.rewards.extend(other.rewards)
self.values.extend(other.values)
self.terminated = other.terminated
self.total_reward += other.total_reward
def discount(x, gamma):
"""Returns discounted sums for each value in x, with discount factor gamma.
This can be used to compute the return (discounted sum of rewards) at each
timestep given a sequence of rewards. See the definitions for return and
REINFORCE in section 3 of https://arxiv.org/pdf/1602.01783.pdf.
Let g^k mean gamma ** k.
For list [x_0, ..., x_N], the following list of discounted sums is computed:
[x_0 + g^1 * x_1 + g^2 * x_2 + ... g^N * x_N,
x_1 + g^1 * x_2 + g^2 * x_3 + ... g^(N-1) * x_N,
x_2 + g^1 * x_3 + g^2 * x_4 + ... g^(N-2) * x_N,
...,
x_(N-1) + g^1 * x_N,
x_N]
Args:
x: List of numbers [x_0, ..., x_N].
gamma: Float between 0 and 1 (inclusive). This is the discount factor.
Returns:
List of discounted sums.
"""
return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
def discounted_advantage_and_rewards(rewards, values, gamma, lambda_=1.0):
"""Compute advantages and returns (discounted sum of rewards).
For an episode of length T, rewards = [r_0, ..., r_(T-1)].
Each reward r_t is observed after taking action a_t at state s_t. A final
state s_T is observed but no reward is given at this state since no action
a_T is taken (otherwise there would be a new state s_(T+1)).
`rewards` and `values` are for a single episode. Return R_t is the discounted
sum of future rewards starting at time t, where `gamma` is the discount
factor.
R_t = r_t + gamma * r_(t+1) + gamma**2 * r_(t+2) + ...
+ gamma**(T-1-t) * r_(T-1)
Advantage A(a_t, s_t) is approximated by computing A(a_t, s_t) = R_t - V(s_t)
where V(s_t) is an approximation of the value at that state, given in the
`values` list. Returns R_t are needed for all REINFORCE algorithms. Advantage
is used for the advantage actor critic variant of REINFORCE.
See algorithm S3 in https://arxiv.org/pdf/1602.01783.pdf.
Additionally another parameter `lambda_` controls the bias-variance tradeoff.
See "Generalized Advantage Estimation": https://arxiv.org/abs/1506.02438.
lambda_ = 1 reduces to regular advantage.
0 <= lambda_ < 1 trades off variance for bias, with lambda_ = 0 being the
most biased.
Bootstrapping is also supported. If an episode does not end in a terminal
state (either because the episode was ended early, or the environment does not
have end states), the true return cannot be computed from the rewards alone.
However, it can be estimated by computing the value (an approximation of
return) of the last state s_T. Thus the `values` list will have an extra item:
values = [V(s_0), ..., V(s_(T-1)), V(s_T)].
Args:
rewards: List of observed rewards [r_0, ..., r_(T-1)].
values: List of estimated values [V(s_0), ..., V(s_(T-1))] with an optional
extra V(s_T) item.
gamma: Discount factor. Number between 0 and 1. 1 means no discount.
If not 1, gamma is typically near 1, like 0.99.
lambda_: Bias-variance tradeoff factor. Between 0 and 1.
Returns:
empirical_values: Returns at each timestep.
generalized_advantage: Avantages at each timestep.
Raises:
ValueError: If shapes of `rewards` and `values` are not rank 1.
ValueError: If len(values) not in (len(rewards), len(rewards) + 1).
"""
rewards = np.asarray(rewards, dtype=np.float32)
values = np.asarray(values, dtype=np.float32)
if rewards.ndim != 1:
raise ValueError('Single episode only. rewards must be rank 1.')
if values.ndim != 1:
raise ValueError('Single episode only. values must be rank 1.')
if len(values) == len(rewards):
# No bootstrapping.
values = np.append(values, 0)
empirical_values = discount(rewards, gamma)
elif len(values) == len(rewards) + 1:
# With bootstrapping.
# Last value is for the terminal state (final state after last action was
# taken).
empirical_values = discount(np.append(rewards, values[-1]), gamma)[:-1]
else:
raise ValueError('values should contain the same number of items or one '
'more item than rewards')
delta = rewards + gamma * values[1:] - values[:-1]
generalized_advantage = discount(delta, gamma * lambda_)
# empirical_values is the discounted sum of rewards into the future.
# generalized_advantage is the target for each policy update.
return empirical_values, generalized_advantage
"""Batch holds a minibatch of episodes.
Let bi = batch_index, i.e. the index of each episode in the minibatch.
Let t = time.
Attributes:
states: States for each timestep in each episode. Indexed by states[bi, t].
actions: Actions for each timestep in each episode. Indexed by actions[bi, t].
discounted_adv: Advantages (computed by discounted_advantage_and_rewards)
for each timestep in each episode. Indexed by discounted_adv[bi, t].
discounted_r: Returns (discounted sum of rewards computed by
discounted_advantage_and_rewards) for each timestep in each episode.
Indexed by discounted_r[bi, t].
total_rewards: Total reward for each episode, i.e. sum of rewards across all
timesteps (not discounted). Indexed by total_rewards[bi].
episode_lengths: Number of timesteps in each episode. If an episode has
N actions, N rewards, and N states, then its length is N. Indexed by
episode_lengths[bi].
batch_size: Number of episodes in this minibatch. An integer.
max_time: Maximum episode length in the batch. An integer.
""" # pylint: disable=pointless-string-statement
Batch = namedtuple(
'Batch',
['states', 'actions', 'discounted_adv', 'discounted_r', 'total_rewards',
'episode_lengths', 'batch_size', 'max_time'])
def process_rollouts(rollouts, gamma, lambda_=1.0):
"""Convert a batch of rollouts into tensors ready to be fed into a model.
Lists from each episode are stacked into 2D tensors and padded with 0s up to
the maximum timestep in the batch.
Args:
rollouts: A list of Rollout instances.
gamma: The discount factor. A number between 0 and 1 (inclusive). See gamma
argument in discounted_advantage_and_rewards.
lambda_: See lambda_ argument in discounted_advantage_and_rewards.
Returns:
Batch instance. states, actions, discounted_adv, and discounted_r are
numpy arrays with shape (batch_size, max_episode_length). episode_lengths
is a list of ints. total_rewards is a list of floats (total reward in each
episode). batch_size and max_time are ints.
Raises:
ValueError: If any of the rollouts are not terminal.
"""
for ro in rollouts:
if not ro.terminated:
raise ValueError('Can only process terminal rollouts.')
episode_lengths = [len(ro.states) for ro in rollouts]
batch_size = len(rollouts)
max_time = max(episode_lengths)
states = utils.stack_pad([ro.states for ro in rollouts], 0, max_time)
actions = utils.stack_pad([ro.actions for ro in rollouts], 0, max_time)
discounted_rewards = [None] * batch_size
discounted_adv = [None] * batch_size
for i, ro in enumerate(rollouts):
disc_r, disc_adv = discounted_advantage_and_rewards(
ro.rewards, ro.values, gamma, lambda_)
discounted_rewards[i] = disc_r
discounted_adv[i] = disc_adv
discounted_rewards = utils.stack_pad(discounted_rewards, 0, max_time)
discounted_adv = utils.stack_pad(discounted_adv, 0, max_time)
total_rewards = [sum(ro.rewards) for ro in rollouts]
return Batch(states=states,
actions=actions,
discounted_adv=discounted_adv,
discounted_r=discounted_rewards,
total_rewards=total_rewards,
episode_lengths=episode_lengths,
batch_size=batch_size,
max_time=max_time)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Tests for common.rollout."""
import numpy as np
import tensorflow as tf
from common import rollout as rollout_lib # brain coder
class RolloutTest(tf.test.TestCase):
def MakeRollout(self, states, actions, rewards, values=None, terminated=True):
rollout = rollout_lib.Rollout()
rollout.add_many(
states=states, actions=actions, rewards=rewards, values=values,
terminated=terminated)
return rollout
def testDiscount(self):
discounted = np.array([1.0 / 2 ** n for n in range(4, -1, -1)])
discounted[:2] += [1.0 / 2 ** n for n in range(1, -1, -1)]
self.assertTrue(np.array_equal(
rollout_lib.discount([0.0, 1.0, 0.0, 0.0, 1.0], 0.50),
discounted))
self.assertTrue(np.array_equal(
rollout_lib.discount(np.array([0.0, 1.0, 0.0, 0.0, 1.0]), 0.50),
discounted))
def testDiscountedAdvantageAndRewards(self):
# lambda=1, No bootstrapping.
values = [0.1, 0.5, 0.5, 0.25]
(empirical_values,
generalized_advantage) = rollout_lib.discounted_advantage_and_rewards(
[0.0, 0.0, 0.0, 1.0],
values,
gamma=0.75,
lambda_=1.0)
expected_discounted_r = (
np.array([1.0 * 0.75 ** n for n in range(3, -1, -1)]))
expected_adv = expected_discounted_r - values
self.assertTrue(np.array_equal(empirical_values, expected_discounted_r))
self.assertTrue(np.allclose(generalized_advantage, expected_adv))
# lambda=1, With bootstrapping.
values = [0.1, 0.5, 0.5, 0.25, 0.75]
(empirical_values,
generalized_advantage) = rollout_lib.discounted_advantage_and_rewards(
[0.0, 0.0, 0.0, 1.0],
values,
gamma=0.75,
lambda_=1.0)
expected_discounted_r = (
np.array([0.75 * 0.75 ** n for n in range(4, 0, -1)])
+ np.array([1.0 * 0.75 ** n for n in range(3, -1, -1)]))
expected_adv = expected_discounted_r - values[:-1]
self.assertTrue(np.array_equal(empirical_values, expected_discounted_r))
self.assertTrue(np.allclose(generalized_advantage, expected_adv))
# lambda=0.5, With bootstrapping.
values = [0.1, 0.5, 0.5, 0.25, 0.75]
rewards = [0.0, 0.0, 0.0, 1.0]
l = 0.5 # lambda
g = 0.75 # gamma
(empirical_values,
generalized_advantage) = rollout_lib.discounted_advantage_and_rewards(
rewards,
values,
gamma=g,
lambda_=l)
expected_discounted_r = (
np.array([0.75 * g ** n for n in range(4, 0, -1)])
+ np.array([1.0 * g ** n for n in range(3, -1, -1)]))
expected_adv = [0.0] * len(values)
for t in range(3, -1, -1):
delta_t = rewards[t] + g * values[t + 1] - values[t]
expected_adv[t] = delta_t + g * l * expected_adv[t + 1]
expected_adv = expected_adv[:-1]
self.assertTrue(np.array_equal(empirical_values, expected_discounted_r))
self.assertTrue(np.allclose(generalized_advantage, expected_adv))
def testProcessRollouts(self):
g = 0.95
rollouts = [
self.MakeRollout(
states=[3, 6, 9],
actions=[1, 2, 3],
rewards=[1.0, -1.0, 0.5],
values=[0.5, 0.5, 0.1]),
self.MakeRollout(
states=[10],
actions=[5],
rewards=[1.0],
values=[0.5])]
batch = rollout_lib.process_rollouts(rollouts, gamma=g)
self.assertEqual(2, batch.batch_size)
self.assertEqual(3, batch.max_time)
self.assertEqual([3, 1], batch.episode_lengths)
self.assertEqual([0.5, 1.0], batch.total_rewards)
self.assertEqual(
[[3, 6, 9], [10, 0, 0]],
batch.states.tolist())
self.assertEqual(
[[1, 2, 3], [5, 0, 0]],
batch.actions.tolist())
rew1, rew2 = rollouts[0].rewards, rollouts[1].rewards
expected_discounted_rewards = [
[rew1[0] + g * rew1[1] + g * g * rew1[2],
rew1[1] + g * rew1[2],
rew1[2]],
[rew2[0], 0.0, 0.0]]
expected_advantages = [
[dr - v
for dr, v
in zip(expected_discounted_rewards[0], rollouts[0].values)],
[expected_discounted_rewards[1][0] - rollouts[1].values[0], 0.0, 0.0]]
self.assertTrue(
np.allclose(expected_discounted_rewards, batch.discounted_r))
self.assertTrue(
np.allclose(expected_advantages, batch.discounted_adv))
if __name__ == '__main__':
tf.test.main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Schedule functions for controlling hparams over time."""
from abc import ABCMeta
from abc import abstractmethod
import math
from common import config_lib # brain coder
class Schedule(object):
"""Schedule is a function which sets a hyperparameter's value over time.
For example, a schedule can be used to decay an hparams, or oscillate it over
time.
This object is constructed with an instance of config_lib.Config (will be
specific to each class implementation). For example if this is a decay
schedule, the config may specify the rate of decay and decay start time. Then
the object instance is called like a function, mapping global step (an integer
counting how many calls to the train op have been made) to the hparam value.
Properties of a schedule function f(t):
0) Domain of t is the non-negative integers (t may be 0).
1) Range of f is the reals.
2) Schedule functions can assume that they will be called in time order. This
allows schedules to be stateful.
3) Schedule functions should be deterministic. Two schedule instances with the
same config must always give the same value for each t, and regardless of
what t's it was previously called on. Users may call f(t) on arbitrary
(positive) time jumps. Essentially, multiple schedule instances used in
replica training will behave the same.
4) Duplicate successive calls on the same time are allowed.
"""
__metaclass__ = ABCMeta
@abstractmethod
def __init__(self, config):
"""Construct this schedule with a config specific to each class impl.
Args:
config: An instance of config_lib.Config.
"""
pass
@abstractmethod
def __call__(self, global_step):
"""Map `global_step` to a value.
`global_step` is an integer counting how many calls to the train op have
been made across all replicas (hence why it is global). Implementations
may assume calls to be made in time order, i.e. `global_step` now >=
previous `global_step` values.
Args:
global_step: Non-negative integer.
Returns:
Hparam value at this step. A number.
"""
pass
class ConstSchedule(Schedule):
"""Constant function.
config:
const: Constant value at every step.
f(t) = const.
"""
def __init__(self, config):
super(ConstSchedule, self).__init__(config)
self.const = config.const
def __call__(self, global_step):
return self.const
class LinearDecaySchedule(Schedule):
"""Linear decay function.
config:
initial: Decay starts from this value.
final: Decay ends at this value.
start_time: Step when decay starts. Constant before it.
end_time: When decay ends. Constant after it.
f(t) is a linear function when start_time <= t <= end_time, with slope of
(final - initial) / (end_time - start_time). f(t) = initial
when t <= start_time. f(t) = final when t >= end_time.
If start_time == end_time, this becomes a step function.
"""
def __init__(self, config):
super(LinearDecaySchedule, self).__init__(config)
self.initial = config.initial
self.final = config.final
self.start_time = config.start_time
self.end_time = config.end_time
if self.end_time < self.start_time:
raise ValueError('start_time must be before end_time.')
# Linear interpolation.
self._time_diff = float(self.end_time - self.start_time)
self._diff = float(self.final - self.initial)
self._slope = (
self._diff / self._time_diff if self._time_diff > 0 else float('inf'))
def __call__(self, global_step):
if global_step <= self.start_time:
return self.initial
if global_step > self.end_time:
return self.final
return self.initial + (global_step - self.start_time) * self._slope
class ExponentialDecaySchedule(Schedule):
"""Exponential decay function.
See https://en.wikipedia.org/wiki/Exponential_decay.
Use this decay function to decay over orders of magnitude. For example, to
decay learning rate from 1e-2 to 1e-6. Exponential decay will decay the
exponent linearly.
config:
initial: Decay starts from this value.
final: Decay ends at this value.
start_time: Step when decay starts. Constant before it.
end_time: When decay ends. Constant after it.
f(t) is an exponential decay function when start_time <= t <= end_time. The
decay rate and amplitude are chosen so that f(t) = initial when
t = start_time, and f(t) = final when t = end_time. f(t) is constant for
t < start_time or t > end_time. initial and final must be positive values.
If start_time == end_time, this becomes a step function.
"""
def __init__(self, config):
super(ExponentialDecaySchedule, self).__init__(config)
self.initial = config.initial
self.final = config.final
self.start_time = config.start_time
self.end_time = config.end_time
if self.initial <= 0 or self.final <= 0:
raise ValueError('initial and final must be positive numbers.')
# Linear interpolation in log space.
self._linear_fn = LinearDecaySchedule(
config_lib.Config(
initial=math.log(self.initial),
final=math.log(self.final),
start_time=self.start_time,
end_time=self.end_time))
def __call__(self, global_step):
return math.exp(self._linear_fn(global_step))
class SmootherstepDecaySchedule(Schedule):
"""Smootherstep decay function.
A sigmoidal like transition from initial to final values. A smoother
transition than linear and exponential decays, hence the name.
See https://en.wikipedia.org/wiki/Smoothstep.
config:
initial: Decay starts from this value.
final: Decay ends at this value.
start_time: Step when decay starts. Constant before it.
end_time: When decay ends. Constant after it.
f(t) is fully defined here:
https://en.wikipedia.org/wiki/Smoothstep#Variations.
f(t) is smooth, as in its first-derivative exists everywhere.
"""
def __init__(self, config):
super(SmootherstepDecaySchedule, self).__init__(config)
self.initial = config.initial
self.final = config.final
self.start_time = config.start_time
self.end_time = config.end_time
if self.end_time < self.start_time:
raise ValueError('start_time must be before end_time.')
self._time_diff = float(self.end_time - self.start_time)
self._diff = float(self.final - self.initial)
def __call__(self, global_step):
if global_step <= self.start_time:
return self.initial
if global_step > self.end_time:
return self.final
x = (global_step - self.start_time) / self._time_diff
# Smootherstep
return self.initial + x * x * x * (x * (x * 6 - 15) + 10) * self._diff
class HardOscillatorSchedule(Schedule):
"""Hard oscillator function.
config:
high: Max value of the oscillator. Value at constant plateaus.
low: Min value of the oscillator. Value at constant valleys.
start_time: Global step when oscillation starts. Constant before this.
period: Width of one oscillation, i.e. number of steps over which the
oscillation takes place.
transition_fraction: Fraction of the period spent transitioning between high
and low values. 50% of this time is spent rising, and 50% of this time
is spent falling. 50% of the remaining time is spent constant at the
high value, and 50% of the remaining time is spent constant at the low
value. transition_fraction = 1.0 means the entire period is spent
rising and falling. transition_fraction = 0.0 means no time is spent
rising and falling, i.e. the function jumps instantaneously between
high and low.
f(t) = high when t < start_time.
f(t) is periodic when t >= start_time, with f(t + period) = f(t).
f(t) is linear with positive slope when rising, and negative slope when
falling. At the start of the period t0, f(t0) = high and begins to descend.
At the middle of the period f is low and is constant until the ascension
begins. f then rises from low to high and is constant again until the period
repeats.
Note: when transition_fraction is 0, f starts the period low and ends high.
"""
def __init__(self, config):
super(HardOscillatorSchedule, self).__init__(config)
self.high = config.high
self.low = config.low
self.start_time = config.start_time
self.period = float(config.period)
self.transition_fraction = config.transition_fraction
self.half_transition_fraction = config.transition_fraction / 2.0
if self.transition_fraction < 0 or self.transition_fraction > 1.0:
raise ValueError('transition_fraction must be between 0 and 1.0')
if self.period <= 0:
raise ValueError('period must be positive')
self._slope = (
float(self.high - self.low) / self.half_transition_fraction
if self.half_transition_fraction > 0 else float('inf'))
def __call__(self, global_step):
if global_step < self.start_time:
return self.high
period_pos = ((global_step - self.start_time) / self.period) % 1.0
if period_pos >= 0.5:
# ascending
period_pos -= 0.5
if period_pos < self.half_transition_fraction:
return self.low + period_pos * self._slope
else:
return self.high
else:
# descending
if period_pos < self.half_transition_fraction:
return self.high - period_pos * self._slope
else:
return self.low
_NAME_TO_CONFIG = {
'const': ConstSchedule,
'linear_decay': LinearDecaySchedule,
'exp_decay': ExponentialDecaySchedule,
'smooth_decay': SmootherstepDecaySchedule,
'hard_osc': HardOscillatorSchedule,
}
def make_schedule(config):
"""Schedule factory.
Given `config` containing a `fn` property, a Schedule implementation is
instantiated with `config`. See `_NAME_TO_CONFIG` for `fn` options.
Args:
config: Config with a `fn` option that specifies which Schedule
implementation to use. `config` is passed into the constructor.
Returns:
A Schedule impl instance.
"""
schedule_class = _NAME_TO_CONFIG[config.fn]
return schedule_class(config)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Tests for common.schedules."""
from math import exp
from math import sqrt
import numpy as np
import tensorflow as tf
from common import config_lib # brain coder
from common import schedules # brain coder
class SchedulesTest(tf.test.TestCase):
def ScheduleTestHelper(self, config, schedule_subtype, io_values):
"""Run common checks for schedules.
Args:
config: Config object which is passed into schedules.make_schedule.
schedule_subtype: The expected schedule type to be instantiated.
io_values: List of (input, output) pairs. Must be in ascending input
order. No duplicate inputs.
"""
# Check that make_schedule makes the correct type.
f = schedules.make_schedule(config)
self.assertTrue(isinstance(f, schedule_subtype))
# Check that multiple instances returned from make_schedule behave the same.
fns = [schedules.make_schedule(config) for _ in xrange(3)]
# Check that all the inputs map to the right outputs.
for i, o in io_values:
for f in fns:
f_out = f(i)
self.assertTrue(
np.isclose(o, f_out),
'Wrong value at input %d. Expected %s, got %s' % (i, o, f_out))
# Check that a subset of the io_values are still correct.
f = schedules.make_schedule(config)
subseq = [io_values[i**2] for i in xrange(int(sqrt(len(io_values))))]
if subseq[-1] != io_values[-1]:
subseq.append(io_values[-1])
for i, o in subseq:
f_out = f(i)
self.assertTrue(
np.isclose(o, f_out),
'Wrong value at input %d. Expected %s, got %s' % (i, o, f_out))
# Check duplicate calls.
f = schedules.make_schedule(config)
for i, o in io_values:
for _ in xrange(3):
f_out = f(i)
self.assertTrue(
np.isclose(o, f_out),
'Duplicate calls at input %d are not equal. Expected %s, got %s'
% (i, o, f_out))
def testConstSchedule(self):
self.ScheduleTestHelper(
config_lib.Config(fn='const', const=5),
schedules.ConstSchedule,
[(0, 5), (1, 5), (10, 5), (20, 5), (100, 5), (1000000, 5)])
def testLinearDecaySchedule(self):
self.ScheduleTestHelper(
config_lib.Config(fn='linear_decay', initial=2, final=0, start_time=10,
end_time=20),
schedules.LinearDecaySchedule,
[(0, 2), (1, 2), (10, 2), (11, 1.8), (15, 1), (19, 0.2), (20, 0),
(100000, 0)])
# Test step function.
self.ScheduleTestHelper(
config_lib.Config(fn='linear_decay', initial=2, final=0, start_time=10,
end_time=10),
schedules.LinearDecaySchedule,
[(0, 2), (1, 2), (10, 2), (11, 0), (15, 0)])
def testExponentialDecaySchedule(self):
self.ScheduleTestHelper(
config_lib.Config(fn='exp_decay', initial=exp(-1), final=exp(-6),
start_time=10, end_time=20),
schedules.ExponentialDecaySchedule,
[(0, exp(-1)), (1, exp(-1)), (10, exp(-1)), (11, exp(-1/2. - 1)),
(15, exp(-5/2. - 1)), (19, exp(-9/2. - 1)), (20, exp(-6)),
(100000, exp(-6))])
# Test step function.
self.ScheduleTestHelper(
config_lib.Config(fn='exp_decay', initial=exp(-1), final=exp(-6),
start_time=10, end_time=10),
schedules.ExponentialDecaySchedule,
[(0, exp(-1)), (1, exp(-1)), (10, exp(-1)), (11, exp(-6)),
(15, exp(-6))])
def testSmootherstepDecaySchedule(self):
self.ScheduleTestHelper(
config_lib.Config(fn='smooth_decay', initial=2, final=0, start_time=10,
end_time=20),
schedules.SmootherstepDecaySchedule,
[(0, 2), (1, 2), (10, 2), (11, 1.98288), (15, 1), (19, 0.01712),
(20, 0), (100000, 0)])
# Test step function.
self.ScheduleTestHelper(
config_lib.Config(fn='smooth_decay', initial=2, final=0, start_time=10,
end_time=10),
schedules.SmootherstepDecaySchedule,
[(0, 2), (1, 2), (10, 2), (11, 0), (15, 0)])
def testHardOscillatorSchedule(self):
self.ScheduleTestHelper(
config_lib.Config(fn='hard_osc', high=2, low=0, start_time=100,
period=10, transition_fraction=0.5),
schedules.HardOscillatorSchedule,
[(0, 2), (1, 2), (10, 2), (100, 2), (101, 1.2), (102, 0.4), (103, 0),
(104, 0), (105, 0), (106, 0.8), (107, 1.6), (108, 2), (109, 2),
(110, 2), (111, 1.2), (112, 0.4), (115, 0), (116, 0.8), (119, 2),
(120, 2), (100001, 1.2), (100002, 0.4), (100005, 0), (100006, 0.8),
(100010, 2)])
# Test instantaneous step.
self.ScheduleTestHelper(
config_lib.Config(fn='hard_osc', high=2, low=0, start_time=100,
period=10, transition_fraction=0),
schedules.HardOscillatorSchedule,
[(0, 2), (1, 2), (10, 2), (99, 2), (100, 0), (104, 0), (105, 2),
(106, 2), (109, 2), (110, 0)])
if __name__ == '__main__':
tf.test.main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Configuration class."""
import bisect
from collections import deque
import cPickle
import heapq
import random
from absl import logging
import numpy as np
import tensorflow as tf
def tuple_to_record(tuple_, record_type):
return record_type(**dict(zip(record_type.__slots__, tuple_)))
def make_record(type_name, attributes, defaults=None):
"""Factory for mutable record classes.
A record acts just like a collections.namedtuple except slots are writable.
One exception is that record classes are not equivalent to tuples or other
record classes of the same length.
Note, each call to `make_record` produces a unique type. Two calls will make
different types even if `type_name` is the same each time.
Args:
type_name: Name of the record type to create.
attributes: List of names of each record attribute. The order of the list
is preserved.
defaults: (optional) default values for attributes. A dict mapping attribute
names to values.
Returns:
A new record type.
Raises:
ValueError: If,
`defaults` is not a dict,
`attributes` contains duplicate names,
`defaults` keys are not contained in `attributes`.
"""
if defaults is None:
defaults = {}
if not isinstance(defaults, dict):
raise ValueError('defaults must be a dict.')
attr_set = set(attributes)
if len(attr_set) < len(attributes):
raise ValueError('No duplicate attributes allowed.')
if not set(defaults.keys()).issubset(attr_set):
raise ValueError('Default attributes must be given in the attributes list.')
class RecordClass(object):
"""A record type.
Acts like mutable tuple with named slots.
"""
__slots__ = list(attributes)
_defaults = dict(defaults)
def __init__(self, *args, **kwargs):
if len(args) > len(self.__slots__):
raise ValueError('Too many arguments. %s has length %d.'
% (type(self).__name__, len(self.__slots__)))
for attr, val in self._defaults.items():
setattr(self, attr, val)
for i, arg in enumerate(args):
setattr(self, self.__slots__[i], arg)
for attr, val in kwargs.items():
setattr(self, attr, val)
for attr in self.__slots__:
if not hasattr(self, attr):
raise ValueError('Required attr "%s" is not set.' % attr)
def __len__(self):
return len(self.__slots__)
def __iter__(self):
for attr in self.__slots__:
yield getattr(self, attr)
def __getitem__(self, index):
return getattr(self, self.__slots__[index])
def __setitem__(self, index, value):
return setattr(self, self.__slots__[index], value)
def __eq__(self, other):
# Types must be equal as well as values.
return (isinstance(other, type(self))
and all(a == b for a, b in zip(self, other)))
def __str__(self):
return '%s(%s)' % (
type(self).__name__,
', '.join(attr + '=' + str(getattr(self, attr))
for attr in self.__slots__))
def __repr__(self):
return str(self)
RecordClass.__name__ = type_name
return RecordClass
# Making minibatches.
def stack_pad(tensors, pad_axes=None, pad_to_lengths=None, dtype=np.float32,
pad_value=0):
"""Stack tensors along 0-th dim and pad them to be the same shape.
Args:
tensors: Any list of iterables (python list, numpy array, etc). Can be 1D
or multi-D iterables.
pad_axes: An int or list of ints. Axes to pad along.
pad_to_lengths: Length in each dimension. If pad_axes was an int, this is an
int or None. If pad_axes was a list of ints, this is a list of mixed int
and None types with the same length, or None. A None length means the
maximum length among the given tensors is used.
dtype: Type of output numpy array. Defaults to np.float32.
pad_value: Value to use for padding. Defaults to 0.
Returns:
Numpy array containing the tensors stacked along the 0-th dimension and
padded along the specified dimensions.
Raises:
ValueError: If the tensors do not have equal shapes along non-padded
dimensions.
"""
tensors = [np.asarray(t) for t in tensors]
max_lengths = [max(l) for l in zip(*[t.shape for t in tensors])]
same_axes = dict(enumerate(max_lengths))
if pad_axes is None:
pad_axes = []
if isinstance(pad_axes, (int, long)):
if pad_to_lengths is not None:
max_lengths[pad_axes] = pad_to_lengths
del same_axes[pad_axes]
else:
if pad_to_lengths is None:
pad_to_lengths = [None] * len(pad_axes)
for i, l in zip(pad_axes, pad_to_lengths):
if l is not None:
max_lengths[i] = l
del same_axes[i]
same_axes_items = same_axes.items()
dest = np.full([len(tensors)] + max_lengths, pad_value, dtype=dtype)
for i, t in enumerate(tensors):
for j, l in same_axes_items:
if t.shape[j] != l:
raise ValueError(
'Tensor at index %d does not have size %d along axis %d'
% (i, l, j))
dest[[i] + [slice(0, d) for d in t.shape]] = t
return dest
class RandomQueue(deque):
def __init__(self, capacity):
super(RandomQueue, self).__init__([], capacity)
self.capacity = capacity
def random_sample(self, sample_size):
idx = np.random.choice(len(self), sample_size)
return [self[i] for i in idx]
def push(self, item):
# Append to right. Oldest element will be popped from left.
self.append(item)
class MPQItemContainer(object):
"""Class for holding an item with its score.
Defines a comparison function for use in the heap-queue.
"""
def __init__(self, score, item, extra_data):
self.item = item
self.score = score
self.extra_data = extra_data
def __cmp__(self, other):
assert isinstance(other, type(self))
return cmp(self.score, other.score)
def __iter__(self):
"""Allows unpacking like a tuple."""
yield self.score
yield self.item
yield self.extra_data
def __repr__(self):
"""String representation of this item.
`extra_data` is not included in the representation. We are assuming that
`extra_data` is not easily interpreted by a human (if it was, it should be
hashable, like a string or tuple).
Returns:
String representation of `self`.
"""
return str((self.score, self.item))
def __str__(self):
return repr(self)
class MaxUniquePriorityQueue(object):
"""A maximum priority queue where duplicates are not added.
The top items by score remain in the queue. When the capacity is reached,
the lowest scored item in the queue will be dropped.
This implementation differs from a typical priority queue, in that the minimum
score is popped, instead of the maximum. Largest scores remain stuck in the
queue. This is useful for accumulating the best known items from a population.
The items used to determine uniqueness must be hashable, but additional
non-hashable data may be stored with each item.
"""
def __init__(self, capacity):
self.capacity = capacity
self.heap = []
self.unique_items = set()
def push(self, score, item, extra_data=None):
"""Push an item onto the queue.
If the queue is at capacity, the item with the smallest score will be
dropped. Note that it is assumed each item has exactly one score. The same
item with a different score will still be dropped.
Args:
score: Number used to prioritize items in the queue. Largest scores are
kept in the queue.
item: A hashable item to be stored. Duplicates of this item will not be
added to the queue.
extra_data: An extra (possible not hashable) data to store with the item.
"""
if item in self.unique_items:
return
if len(self.heap) >= self.capacity:
_, popped_item, _ = heapq.heappushpop(
self.heap, MPQItemContainer(score, item, extra_data))
self.unique_items.add(item)
self.unique_items.remove(popped_item)
else:
heapq.heappush(self.heap, MPQItemContainer(score, item, extra_data))
self.unique_items.add(item)
def pop(self):
"""Pop the item with the lowest score.
Returns:
score: Item's score.
item: The item that was popped.
extra_data: Any extra data stored with the item.
"""
if not self.heap:
return ()
score, item, extra_data = heapq.heappop(self.heap)
self.unique_items.remove(item)
return score, item, extra_data
def get_max(self):
"""Peek at the item with the highest score.
Returns:
Same as `pop`.
"""
if not self.heap:
return ()
score, item, extra_data = heapq.nlargest(1, self.heap)[0]
return score, item, extra_data
def get_min(self):
"""Peek at the item with the lowest score.
Returns:
Same as `pop`.
"""
if not self.heap:
return ()
score, item, extra_data = heapq.nsmallest(1, self.heap)[0]
return score, item, extra_data
def random_sample(self, sample_size):
"""Randomly select items from the queue.
This does not modify the queue.
Items are drawn from a uniform distribution, and not weighted by score.
Args:
sample_size: Number of random samples to draw. The same item can be
sampled multiple times.
Returns:
List of sampled items (of length `sample_size`). Each element in the list
is a tuple: (item, extra_data).
"""
idx = np.random.choice(len(self.heap), sample_size)
return [(self.heap[i].item, self.heap[i].extra_data) for i in idx]
def iter_in_order(self):
"""Iterate over items in the queue from largest score to smallest.
Yields:
item: Hashable item.
extra_data: Extra data stored with the item.
"""
for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap):
yield item, extra_data
def __len__(self):
return len(self.heap)
def __iter__(self):
for _, item, _ in self.heap:
yield item
def __repr__(self):
return '[' + ', '.join(repr(c) for c in self.heap) + ']'
def __str__(self):
return repr(self)
class RouletteWheel(object):
"""Randomly samples stored objects proportionally to their given weights.
Stores objects and weights. Acts like a roulette wheel where each object is
given a slice of the roulette disk proportional to its weight.
This can be used as a replay buffer where past experiences are sampled
proportionally to their weights. A good choice of "weight" for reinforcement
learning is exp(reward / temperature) where temperature -> inf makes the
distribution more uniform and temperature -> 0 makes the distribution more
peaky.
To prevent experiences from being overweighted by appearing in the replay
buffer multiple times, a "unique mode" is supported where duplicate
experiences are ignored. In unique mode, weights can be quickly retrieved from
keys.
"""
def __init__(self, unique_mode=False, save_file=None):
"""Construct empty RouletteWheel.
If `save_file` is not None, and the file already exists on disk, whatever
is in the file will be loaded into this instance. This allows jobs using
RouletteWheel to resume after preemption.
Args:
unique_mode: If True, puts this RouletteWheel into unique mode, where
objects are added with hashable keys, so that duplicates are ignored.
save_file: Optional file path to save to. Must be a string containing
an absolute path to a file, or None. File will be Python pickle
format.
"""
self.unique_mode = unique_mode
self.objects = []
self.weights = []
self.partial_sums = []
if self.unique_mode:
self.keys_to_weights = {}
self.save_file = save_file
self.save_to_disk_buffer = []
if save_file is not None and tf.gfile.Exists(save_file):
# Load from disk.
with tf.gfile.OpenFast(save_file, 'r') as f:
count = 0
while 1:
try:
obj, weight, key = cPickle.load(f)
except EOFError:
break
else:
self.add(obj, weight, key)
count += 1
logging.info('Loaded %d samples from disk.', count)
# Clear buffer since these items are already on disk.
self.save_to_disk_buffer = []
def __iter__(self):
return iter(zip(self.objects, self.weights))
def __len__(self):
return len(self.objects)
def is_empty(self):
"""Returns whether there is anything in the roulette wheel."""
return not self.partial_sums
@property
def total_weight(self):
"""Total cumulative weight across all objects."""
if self.partial_sums:
return self.partial_sums[-1]
return 0.0
def has_key(self, key):
if self.unique_mode:
RuntimeError('has_key method can only be called in unique mode.')
return key in self.keys_to_weights
def get_weight(self, key):
if self.unique_mode:
RuntimeError('get_weight method can only be called in unique mode.')
return self.keys_to_weights[key]
def add(self, obj, weight, key=None):
"""Add one object and its weight to the roulette wheel.
Args:
obj: Any object to be stored.
weight: A non-negative float. The given object will be drawn with
probability proportional to this weight when sampling.
key: This argument is only used when in unique mode. To allow `obj` to
be an unhashable type, like list, a separate hashable key is given.
Each `key` should be unique to each `obj`. `key` is used to check if
`obj` has been added to the roulette wheel before.
Returns:
True if the object was added, False if it was not added due to it being
a duplicate (this only happens in unique mode).
Raises:
ValueError: If `weight` is negative.
ValueError: If `key` is not given when in unique mode, or if `key` is
given when not in unique mode.
"""
if weight < 0:
raise ValueError('Weight must be non-negative')
if self.unique_mode:
if key is None:
raise ValueError(
'Hashable key required for objects when unique mode is enabled.')
if key in self.keys_to_weights:
# Weight updates are not allowed. Ignore the given value of `weight`.
return False
self.keys_to_weights[key] = weight
elif key is not None:
raise ValueError(
'key argument should not be used when unique mode is disabled.')
self.objects.append(obj)
self.weights.append(weight)
self.partial_sums.append(self.total_weight + weight)
if self.save_file is not None:
# Record new item in buffer.
self.save_to_disk_buffer.append((obj, weight, key))
return True
def add_many(self, objs, weights, keys=None):
"""Add many object and their weights to the roulette wheel.
Arguments are the same as the `add` method, except each is a list. Lists
must all be the same length.
Args:
objs: List of objects to be stored.
weights: List of non-negative floats. See `add` method.
keys: List of hashable keys. This argument is only used when in unique
mode. See `add` method.
Returns:
Number of objects added. This number will be less than the number of
objects provided if we are in unique mode and some keys are already
in the roulette wheel.
Raises:
ValueError: If `keys` argument is provided when unique_mode == False, or
is not provided when unique_mode == True.
ValueError: If any of the lists are not the same length.
ValueError: If any of the weights are negative.
"""
if keys is not None and not self.unique_mode:
raise ValueError('Not in unique mode. Do not provide keys.')
elif keys is None and self.unique_mode:
raise ValueError('In unique mode. You must provide hashable keys.')
if keys and len(objs) != len(keys):
raise ValueError('Number of objects does not equal number of keys.')
if len(objs) != len(weights):
raise ValueError('Number of objects does not equal number of weights.')
return sum([self.add(obj, weights[i], key=keys[i] if keys else None)
for i, obj in enumerate(objs)])
def sample(self):
"""Spin the roulette wheel.
Randomly select an object with probability proportional to its weight.
Returns:
object: The selected object.
weight: The weight of the selected object.
Raises:
RuntimeError: If the roulette wheel is empty.
"""
if self.is_empty():
raise RuntimeError('Trying to sample from empty roulette wheel.')
spin = random.random() * self.total_weight
# Binary search.
i = bisect.bisect_right(self.partial_sums, spin)
if i == len(self.partial_sums):
# This should not happen since random.random() will always be strictly
# less than 1.0, and the last partial sum equals self.total_weight().
# However it may happen due to rounding error. In that case it is easy to
# handle this, just select the last object.
i -= 1
return self.objects[i], self.weights[i]
def sample_many(self, count):
"""Spin the roulette wheel `count` times and return the results."""
if self.is_empty():
raise RuntimeError('Trying to sample from empty roulette wheel.')
return [self.sample() for _ in xrange(count)]
def incremental_save(self, log_info=False):
"""Write new entries to disk.
This performs an append operation on the `save_file` given in the
constructor. Any entries added since the last call to `incremental_save`
will be appended to the file.
If a new RouletteWheel is constructed with the same `save_file`, all the
entries written there will be automatically loaded into the instance.
This is useful when a job resumes after preemption.
Args:
log_info: If True, info about this operation will be logged.
Raises:
RuntimeError: If `save_file` given in the constructor is None.
"""
if self.save_file is None:
raise RuntimeError('Cannot call incremental_save. `save_file` is None.')
if log_info:
logging.info('Saving %d new samples to disk.',
len(self.save_to_disk_buffer))
with tf.gfile.OpenFast(self.save_file, 'a') as f:
for entry in self.save_to_disk_buffer:
cPickle.dump(entry, f)
# Clear the buffer.
self.save_to_disk_buffer = []
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Tests for common.utils.
"""
from collections import Counter
import random
import tempfile
import numpy as np
import tensorflow as tf
from common import utils # brain coder
class UtilsTest(tf.test.TestCase):
def testStackPad(self):
# 1D.
tensors = [[1, 2, 3], [4, 5, 6, 7, 8], [9]]
result = utils.stack_pad(tensors, pad_axes=0, pad_to_lengths=6)
self.assertTrue(np.array_equal(
result,
np.asarray([[1, 2, 3, 0, 0, 0],
[4, 5, 6, 7, 8, 0],
[9, 0, 0, 0, 0, 0]], dtype=np.float32)))
# 3D.
tensors = [[[[1, 2, 3], [4, 5, 6]]],
[[[7, 8, 9], [0, 1, 2]], [[3, 4, 5], [6, 7, 8]]],
[[[0, 1, 2]], [[3, 4, 5]]]]
result = utils.stack_pad(tensors, pad_axes=[0, 1], pad_to_lengths=[2, 2])
self.assertTrue(np.array_equal(
result,
np.asarray([[[[1, 2, 3], [4, 5, 6]],
[[0, 0, 0], [0, 0, 0]]],
[[[7, 8, 9], [0, 1, 2]],
[[3, 4, 5], [6, 7, 8]]],
[[[0, 1, 2], [0, 0, 0]],
[[3, 4, 5], [0, 0, 0]]]], dtype=np.float32)))
def testStackPadNoAxes(self):
# 2D.
tensors = [[[1, 2, 3], [4, 5, 6]],
[[7, 8, 9], [1, 2, 3]],
[[4, 5, 6], [7, 8, 9]]]
result = utils.stack_pad(tensors)
self.assertTrue(np.array_equal(
result,
np.asarray(tensors)))
def testStackPadNoneLength(self):
# 1D.
tensors = [[1, 2, 3], [4, 5, 6, 7, 8], [9]]
result = utils.stack_pad(tensors, pad_axes=0, pad_to_lengths=None)
self.assertTrue(np.array_equal(
result,
np.asarray([[1, 2, 3, 0, 0],
[4, 5, 6, 7, 8],
[9, 0, 0, 0, 0]], dtype=np.float32)))
# 3D.
tensors = [[[[1, 2, 3], [4, 5, 6]]],
[[[7, 8, 9], [0, 1, 2]], [[3, 4, 5], [6, 7, 8]]],
[[[0, 1, 2]], [[3, 4, 5]]]]
result = utils.stack_pad(tensors, pad_axes=[0, 1], pad_to_lengths=None)
self.assertTrue(np.array_equal(
result,
np.asarray([[[[1, 2, 3], [4, 5, 6]],
[[0, 0, 0], [0, 0, 0]]],
[[[7, 8, 9], [0, 1, 2]],
[[3, 4, 5], [6, 7, 8]]],
[[[0, 1, 2], [0, 0, 0]],
[[3, 4, 5], [0, 0, 0]]]], dtype=np.float32)))
# 3D with partial pad_to_lengths.
tensors = [[[[1, 2, 3], [4, 5, 6]]],
[[[7, 8, 9], [0, 1, 2]], [[3, 4, 5], [6, 7, 8]]],
[[[0, 1, 2]], [[3, 4, 5]]]]
result = utils.stack_pad(tensors, pad_axes=[0, 1], pad_to_lengths=[None, 3])
self.assertTrue(np.array_equal(
result,
np.asarray([[[[1, 2, 3], [4, 5, 6], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0]]],
[[[7, 8, 9], [0, 1, 2], [0, 0, 0]],
[[3, 4, 5], [6, 7, 8], [0, 0, 0]]],
[[[0, 1, 2], [0, 0, 0], [0, 0, 0]],
[[3, 4, 5], [0, 0, 0], [0, 0, 0]]]], dtype=np.float32)))
def testStackPadValueError(self):
# 3D.
tensors = [[[[1, 2, 3], [4, 5, 6]]],
[[[7, 8, 9], [0, 1, 2]], [[3, 4, 5], [6, 7, 8]]],
[[[0, 1, 2]], [[3, 4, 5]]],
[[[1, 2, 3, 4]]]]
# Not all tensors have the same shape along axis 2.
with self.assertRaises(ValueError):
utils.stack_pad(tensors, pad_axes=[0, 1], pad_to_lengths=[2, 2])
def testRecord(self):
my_record = utils.make_record('my_record', ['a', 'b', 'c'], {'b': 55})
inst = my_record(a=1, b=2, c=3)
self.assertEqual(1, inst.a)
self.assertEqual(2, inst.b)
self.assertEqual(3, inst.c)
self.assertEqual(1, inst[0])
self.assertEqual(2, inst[1])
self.assertEqual(3, inst[2])
self.assertEqual([1, 2, 3], list(iter(inst)))
self.assertEqual(3, len(inst))
inst.b = 999
self.assertEqual(999, inst.b)
self.assertEqual(999, inst[1])
inst2 = my_record(1, 999, 3)
self.assertTrue(inst == inst2)
inst2[1] = 3
self.assertFalse(inst == inst2)
inst3 = my_record(a=1, c=3)
inst.b = 55
self.assertEqual(inst, inst3)
def testRecordUnique(self):
record1 = utils.make_record('record1', ['a', 'b', 'c'])
record2 = utils.make_record('record2', ['a', 'b', 'c'])
self.assertNotEqual(record1(1, 2, 3), record2(1, 2, 3))
self.assertEqual(record1(1, 2, 3), record1(1, 2, 3))
def testTupleToRecord(self):
my_record = utils.make_record('my_record', ['a', 'b', 'c'])
inst = utils.tuple_to_record((5, 6, 7), my_record)
self.assertEqual(my_record(5, 6, 7), inst)
def testRecordErrors(self):
my_record = utils.make_record('my_record', ['a', 'b', 'c'], {'b': 10})
with self.assertRaises(ValueError):
my_record(c=5) # Did not provide required argument 'a'.
with self.assertRaises(ValueError):
my_record(1, 2, 3, 4) # Too many arguments.
def testRandomQueue(self):
np.random.seed(567890)
queue = utils.RandomQueue(5)
queue.push(5)
queue.push(6)
queue.push(7)
queue.push(8)
queue.push(9)
queue.push(10)
self.assertTrue(5 not in queue)
sample = queue.random_sample(1000)
self.assertEqual(1000, len(sample))
self.assertEqual([6, 7, 8, 9, 10], sorted(np.unique(sample).tolist()))
def testMaxUniquePriorityQueue(self):
queue = utils.MaxUniquePriorityQueue(5)
queue.push(1.0, 'string 1')
queue.push(-0.5, 'string 2')
queue.push(0.5, 'string 3')
self.assertEqual((-0.5, 'string 2', None), queue.pop())
queue.push(0.1, 'string 4')
queue.push(1.5, 'string 5')
queue.push(0.0, 'string 6')
queue.push(0.2, 'string 7')
self.assertEqual((1.5, 'string 5', None), queue.get_max())
self.assertEqual((0.1, 'string 4', None), queue.get_min())
self.assertEqual(
[('string 5', None), ('string 1', None), ('string 3', None),
('string 7', None), ('string 4', None)],
list(queue.iter_in_order()))
def testMaxUniquePriorityQueue_Duplicates(self):
queue = utils.MaxUniquePriorityQueue(5)
queue.push(0.0, 'string 1')
queue.push(0.0, 'string 2')
queue.push(0.0, 'string 3')
self.assertEqual((0.0, 'string 1', None), queue.pop())
self.assertEqual((0.0, 'string 2', None), queue.pop())
self.assertEqual((0.0, 'string 3', None), queue.pop())
self.assertEqual(0, len(queue))
queue.push(0.1, 'string 4')
queue.push(1.5, 'string 5')
queue.push(0.3, 'string 6')
queue.push(0.2, 'string 7')
queue.push(0.0, 'string 8')
queue.push(1.5, 'string 5')
queue.push(1.5, 'string 5')
self.assertEqual((1.5, 'string 5', None), queue.get_max())
self.assertEqual((0.0, 'string 8', None), queue.get_min())
self.assertEqual(
[('string 5', None), ('string 6', None), ('string 7', None),
('string 4', None), ('string 8', None)],
list(queue.iter_in_order()))
def testMaxUniquePriorityQueue_ExtraData(self):
queue = utils.MaxUniquePriorityQueue(5)
queue.push(1.0, 'string 1', [1, 2, 3])
queue.push(0.5, 'string 2', [4, 5, 6])
queue.push(0.5, 'string 3', [7, 8, 9])
queue.push(0.5, 'string 2', [10, 11, 12])
self.assertEqual((0.5, 'string 2', [4, 5, 6]), queue.pop())
self.assertEqual((0.5, 'string 3', [7, 8, 9]), queue.pop())
self.assertEqual((1.0, 'string 1', [1, 2, 3]), queue.pop())
self.assertEqual(0, len(queue))
queue.push(0.5, 'string 2', [10, 11, 12])
self.assertEqual((0.5, 'string 2', [10, 11, 12]), queue.pop())
def testRouletteWheel(self):
random.seed(12345678987654321)
r = utils.RouletteWheel()
self.assertTrue(r.is_empty())
with self.assertRaises(RuntimeError):
r.sample() # Cannot sample when empty.
self.assertEqual(0, r.total_weight)
self.assertEqual(True, r.add('a', 0.1))
self.assertFalse(r.is_empty())
self.assertEqual(0.1, r.total_weight)
self.assertEqual(True, r.add('b', 0.01))
self.assertEqual(0.11, r.total_weight)
self.assertEqual(True, r.add('c', 0.5))
self.assertEqual(True, r.add('d', 0.1))
self.assertEqual(True, r.add('e', 0.05))
self.assertEqual(True, r.add('f', 0.03))
self.assertEqual(True, r.add('g', 0.001))
self.assertEqual(0.791, r.total_weight)
self.assertFalse(r.is_empty())
# Check that sampling is correct.
obj, weight = r.sample()
self.assertTrue(isinstance(weight, float), 'Type: %s' % type(weight))
self.assertTrue((obj, weight) in r)
for obj, weight in r.sample_many(100):
self.assertTrue(isinstance(weight, float), 'Type: %s' % type(weight))
self.assertTrue((obj, weight) in r)
# Check that sampling distribution is correct.
n = 1000000
c = Counter(r.sample_many(n))
for obj, w in r:
estimated_w = c[(obj, w)] / float(n) * r.total_weight
self.assertTrue(
np.isclose(w, estimated_w, atol=1e-3),
'Expected %s, got %s, for object %s' % (w, estimated_w, obj))
def testRouletteWheel_AddMany(self):
random.seed(12345678987654321)
r = utils.RouletteWheel()
self.assertTrue(r.is_empty())
with self.assertRaises(RuntimeError):
r.sample() # Cannot sample when empty.
self.assertEqual(0, r.total_weight)
count = r.add_many(
['a', 'b', 'c', 'd', 'e', 'f', 'g'],
[0.1, 0.01, 0.5, 0.1, 0.05, 0.03, 0.001])
self.assertEqual(7, count)
self.assertFalse(r.is_empty())
self.assertEqual(0.791, r.total_weight)
# Adding no items is allowed.
count = r.add_many([], [])
self.assertEqual(0, count)
self.assertFalse(r.is_empty())
self.assertEqual(0.791, r.total_weight)
# Check that sampling is correct.
obj, weight = r.sample()
self.assertTrue(isinstance(weight, float), 'Type: %s' % type(weight))
self.assertTrue((obj, weight) in r)
for obj, weight in r.sample_many(100):
self.assertTrue(isinstance(weight, float), 'Type: %s' % type(weight))
self.assertTrue((obj, weight) in r)
# Check that sampling distribution is correct.
n = 1000000
c = Counter(r.sample_many(n))
for obj, w in r:
estimated_w = c[(obj, w)] / float(n) * r.total_weight
self.assertTrue(
np.isclose(w, estimated_w, atol=1e-3),
'Expected %s, got %s, for object %s' % (w, estimated_w, obj))
def testRouletteWheel_AddZeroWeights(self):
r = utils.RouletteWheel()
self.assertEqual(True, r.add('a', 0))
self.assertFalse(r.is_empty())
self.assertEqual(4, r.add_many(['b', 'c', 'd', 'e'], [0, 0.1, 0, 0]))
self.assertEqual(
[('a', 0.0), ('b', 0.0), ('c', 0.1), ('d', 0.0), ('e', 0.0)],
list(r))
def testRouletteWheel_UniqueMode(self):
random.seed(12345678987654321)
r = utils.RouletteWheel(unique_mode=True)
self.assertEqual(True, r.add([1, 2, 3], 1, 'a'))
self.assertEqual(True, r.add([4, 5], 0.5, 'b'))
self.assertEqual(False, r.add([1, 2, 3], 1.5, 'a'))
self.assertEqual(
[([1, 2, 3], 1.0), ([4, 5], 0.5)],
list(r))
self.assertEqual(1.5, r.total_weight)
self.assertEqual(
2,
r.add_many(
[[5, 6, 2, 3], [1, 2, 3], [8], [1, 2, 3]],
[0.1, 0.2, 0.1, 2.0],
['c', 'a', 'd', 'a']))
self.assertEqual(
[([1, 2, 3], 1.0), ([4, 5], 0.5), ([5, 6, 2, 3], 0.1), ([8], 0.1)],
list(r))
self.assertTrue(np.isclose(1.7, r.total_weight))
self.assertEqual(0, r.add_many([], [], [])) # Adding no items is allowed.
with self.assertRaises(ValueError):
# Key not given.
r.add([7, 8, 9], 2.0)
with self.assertRaises(ValueError):
# Keys not given.
r.add_many([[7, 8, 9], [10]], [2.0, 2.0])
self.assertEqual(True, r.has_key('a'))
self.assertEqual(True, r.has_key('b'))
self.assertEqual(False, r.has_key('z'))
self.assertEqual(1.0, r.get_weight('a'))
self.assertEqual(0.5, r.get_weight('b'))
r = utils.RouletteWheel(unique_mode=False)
self.assertEqual(True, r.add([1, 2, 3], 1))
self.assertEqual(True, r.add([4, 5], 0.5))
self.assertEqual(True, r.add([1, 2, 3], 1.5))
self.assertEqual(
[([1, 2, 3], 1.0), ([4, 5], 0.5), ([1, 2, 3], 1.5)],
list(r))
self.assertEqual(3, r.total_weight)
self.assertEqual(
4,
r.add_many(
[[5, 6, 2, 3], [1, 2, 3], [8], [1, 2, 3]],
[0.1, 0.2, 0.1, 0.2]))
self.assertEqual(
[([1, 2, 3], 1.0), ([4, 5], 0.5), ([1, 2, 3], 1.5),
([5, 6, 2, 3], 0.1), ([1, 2, 3], 0.2), ([8], 0.1), ([1, 2, 3], 0.2)],
list(r))
self.assertTrue(np.isclose(3.6, r.total_weight))
with self.assertRaises(ValueError):
# Key is given.
r.add([7, 8, 9], 2.0, 'a')
with self.assertRaises(ValueError):
# Keys are given.
r.add_many([[7, 8, 9], [10]], [2.0, 2.0], ['a', 'b'])
def testRouletteWheel_IncrementalSave(self):
f = tempfile.NamedTemporaryFile()
r = utils.RouletteWheel(unique_mode=True, save_file=f.name)
entries = [
([1, 2, 3], 0.1, 'a'),
([4, 5], 0.2, 'b'),
([6], 0.3, 'c'),
([7, 8, 9, 10], 0.25, 'd'),
([-1, -2], 0.15, 'e'),
([-3, -4, -5], 0.5, 'f')]
self.assertTrue(r.is_empty())
for i in range(0, len(entries), 2):
r.add(*entries[i])
r.add(*entries[i + 1])
r.incremental_save()
r2 = utils.RouletteWheel(unique_mode=True, save_file=f.name)
self.assertEqual(i + 2, len(r2))
count = 0
for j, (obj, weight) in enumerate(r2):
self.assertEqual(entries[j][0], obj)
self.assertEqual(entries[j][1], weight)
self.assertEqual(weight, r2.get_weight(entries[j][2]))
count += 1
self.assertEqual(i + 2, count)
if __name__ == '__main__':
tf.test.main()
licenses(["notice"])
package(default_visibility = [
"//learning/brain/research/neural_coder:__subpackages__",
])
load("@subpar//:subpar.bzl", "par_binary")
par_binary(
name = "run",
srcs = ["run.py"],
deps = [
":defaults",
":ga_train",
":pg_train",
# absl dep :app
# absl dep /flags
# absl dep /logging
],
)
par_binary(
name = "tune",
srcs = ["tune.py"],
deps = [
":defaults",
":run",
# file dep
# absl dep :app
# absl dep /flags
# absl dep /logging
# numpy dep
# tensorflow dep
],
)
py_library(
name = "ga_train",
srcs = ["ga_train.py"],
deps = [
":data",
":defaults",
":ga_lib",
":results_lib",
# file dep
# absl dep /flags
# absl dep /logging
# numpy dep
# tensorflow dep
"//common:utils", # project
],
)
py_library(
name = "ga_lib",
srcs = ["ga_lib.py"],
deps = [
":misc",
# absl dep /flags
# absl dep /logging
# numpy dep
"//common:bf", # project
"//common:utils", # project
],
)
py_test(
name = "ga_train_test",
srcs = ["ga_train_test.py"],
deps = [
":defaults",
":run",
# absl dep /flags
# tensorflow dep
],
)
py_library(
name = "pg_train",
srcs = ["pg_train.py"],
deps = [
":data",
":defaults",
":pg_agent",
":results_lib",
# file dep
# absl dep /flags
# absl dep /logging
# tensorflow dep
# tensorflow internal dep # build_cleaner: keep
],
)
py_library(
name = "pg_agent",
srcs = ["pg_agent.py"],
deps = [
":misc",
# file dep
# absl dep /logging
# numpy dep
# tensorflow dep
"//common:rollout", # project
"//common:utils", # project
],
)
py_test(
name = "pg_agent_test",
srcs = ["pg_agent_test.py"],
deps = [
":data",
":defaults",
":misc",
":pg_agent",
":pg_train",
# absl dep /logging
# numpy dep
# tensorflow dep
"//common:utils", # project
],
)
py_library(
name = "defaults",
srcs = ["defaults.py"],
deps = [
# absl dep /logging
"//common:config_lib", # project
],
)
py_library(
name = "misc",
srcs = ["misc.py"],
)
py_library(
name = "data",
srcs = ["data.py"],
deps = [
":code_tasks",
# absl dep /logging
],
)
py_library(
name = "code_tasks",
srcs = ["code_tasks.py"],
deps = [
":misc",
":test_tasks",
# absl dep /logging
# numpy dep
"//common:bf", # project
"//common:reward", # project
],
)
py_test(
name = "code_tasks_test",
srcs = ["code_tasks_test.py"],
deps = [
":code_tasks",
":defaults",
# numpy dep
# tensorflow dep
],
)
py_library(
name = "test_tasks",
srcs = ["test_tasks.py"],
deps = [
":misc",
"//common:reward", # project
],
)
py_test(
name = "test_tasks_test",
srcs = ["test_tasks_test.py"],
deps = [
":misc",
":test_tasks",
# numpy dep
# tensorflow dep
],
)
py_test(
name = "pg_train_test",
size = "large",
srcs = ["pg_train_test.py"],
deps = [
":defaults",
":run",
# absl dep /logging
# tensorflow dep
],
)
py_library(
name = "results_lib",
srcs = ["results_lib.py"],
deps = [
# file dep
# tensorflow dep
],
)
py_test(
name = "results_lib_test",
srcs = ["results_lib_test.py"],
deps = [
":results_lib",
# tensorflow dep
],
)
par_binary(
name = "aggregate_experiment_results",
srcs = ["aggregate_experiment_results.py"],
deps = [
":misc",
":results_lib",
# file dep
# absl dep :app
# absl dep /flags
# numpy dep
# tensorflow dep
],
)
par_binary(
name = "aggregate_tuning_results",
srcs = ["aggregate_tuning_results.py"],
deps = [
# file dep
# absl dep :app
# absl dep /flags
# tensorflow dep
],
)
# Experiments for ICLR 2018 paper.
[Code Synthesis with Priority Queue Training](https://openreview.net/forum?id=r1AoGNlC-).
Runs policy gradient (REINFORCE), priority queue training, genetic algorithm,
and uniform random search.
Run all examples below out of your top-level repo directory, i.e. where your git
clone resides.
## Just tell me how to run something and see results
```bash
# These tasks are the fastest to learn. 'echo' and 'count-down' are very
# easy. run_eval_tasks.py will do most of the work to run all the jobs.
# Should take between 10 and 30 minutes.
# How many repetitions each experiment will run. In the paper, we use 25. Less
# reps means faster experiments, but noisier results.
REPS=25
# Extra description in the job names for these experiments. Use this description
# to distinguish between multiple runs of the same experiment.
DESC="demo"
# The tasks to run.
TASKS="reverse echo-second-seq"
# The model types and max NPE.
EXPS=( pg-20M topk-20M ga-20M rand-20M )
# Where training data is saved. This is chosen by launch_training.sh. Custom
# implementations of launch_training.sh may use different locations.
MODELS_DIR="/tmp/models"
# Run run_eval_tasks.py for each experiment name in EXPS.
for exp in "${EXPS[@]}"
do
./single_task/run_eval_tasks.py \
--exp "$exp" --tasks $TASKS --desc "$DESC" --reps $REPS
done
# During training or after completion, run this to aggregate results into a
# table. This is also useful for seeing how much progress has been made.
# Make sure the arguments here match the settings used above.
# Note: This can take a few minutes because it reads from every experiment
# directory.
bazel run single_task:aggregate_experiment_results -- \
--models_dir="$MODELS_DIR" \
--max_npe="20M" \
--task_list="$TASKS" \
--model_types="[('pg', '$DESC'), ('topk', '$DESC'), ('ga', '$DESC'),
('rand', '$DESC')]" \
--csv_file="/tmp/results_table.csv"
```
## Reproduce tuning results in paper
```bash
bazel build -c opt single_task:tune.par
# PG and TopK Tuning.
MAX_NPE=5000000
CONFIG="
env=c(task_cycle=['reverse-tune','remove-tune']),
agent=c(
algorithm='pg',
grad_clip_threshold=50.0,param_init_factor=0.5,entropy_beta=0.05,lr=1e-5,
optimizer='rmsprop',ema_baseline_decay=0.99,topk_loss_hparam=0.0,topk=0,
replay_temperature=1.0,alpha=0.0,eos_token=False),
timestep_limit=50,batch_size=64"
./single_task/launch_tuning.sh \
--job_name="iclr_pg_gridsearch.reverse-remove" \
--config="$CONFIG" \
--max_npe="$MAX_NPE" \
--num_workers_per_tuner=1 \
--num_ps_per_tuner=0 \
--num_tuners=1 \
--num_repetitions=50 \
--hparam_space_type="pg" \
--stop_on_success=true
./single_task/launch_tuning.sh \
--job_name="iclr_pg_topk_gridsearch.reverse-remove" \
--config="$CONFIG" \
--max_npe="$MAX_NPE" \
--num_workers_per_tuner=1 \
--num_ps_per_tuner=0 \
--num_tuners=1 \
--num_repetitions=50 \
--hparam_space_type="pg-topk" \
--fixed_hparams="topk=10" \
--stop_on_success=true
./single_task/launch_tuning.sh \
--job_name="iclr_topk_gridsearch.reverse-remove" \
--config="$CONFIG" \
--max_npe="$MAX_NPE" \
--num_workers_per_tuner=1 \
--num_ps_per_tuner=0 \
--num_tuners=1 \
--num_repetitions=50 \
--hparam_space_type="topk" \
--fixed_hparams="topk=10" \
--stop_on_success=true
# GA Tuning.
CONFIG="
env=c(task_cycle=['reverse-tune','remove-char-tune']),
agent=c(algorithm='ga'),
timestep_limit=50"
./single_task/launch_tuning.sh \
--job_name="iclr_ga_gridsearch.reverse-remove" \
--config="$CONFIG" \
--max_npe="$MAX_NPE" \
--num_workers_per_tuner=25 \
--num_ps_per_tuner=0 \
--num_tuners=1 \
--num_repetitions=50 \
--hparam_space_type="ga" \
--stop_on_success=true
# Aggregate tuning results. Run after tuning jobs complete.
bazel run -c opt single_task:aggregate_tuning_results -- \
--tuning_dir="$MODELS_DIR/iclr_pg_gridsearch.reverse-remove"
bazel run -c opt single_task:aggregate_tuning_results -- \
--tuning_dir="$MODELS_DIR/iclr_pg_topk_gridsearch.reverse-remove"
bazel run -c opt single_task:aggregate_tuning_results -- \
--tuning_dir="$MODELS_DIR/iclr_topk_gridsearch.reverse-remove"
bazel run -c opt single_task:aggregate_tuning_results -- \
--tuning_dir="$MODELS_DIR/iclr_ga_gridsearch.reverse-remove"
```
## Reproduce eval results in paper
```bash
DESC="v0" # Description for each experiment. "Version 0" is a good default.
EXPS=( pg-5M topk-5M ga-5M rand-5M pg-20M topk-20M ga-20M rand-20M )
for exp in "${EXPS[@]}"
do
./single_task/run_eval_tasks.py \
--exp "$exp" --iclr_tasks --desc "$DESC"
done
```
## Run single experiment
```bash
EXP="topk-20M" # Learning algorithm + max-NPE
TASK="reverse" # Coding task
DESC="v0" # Description for each experiment. "Version 0" is a good default.
./single_task/run_eval_tasks.py \
--exp "$EXP" --task "$TASK" --desc "$DESC"
```
## Fetch eval results into a table
```bash
# These arguments should match the settings you used to run the experiments.
MODELS_DIR="/tmp/models"
MAX_NPE="20M"
DESC="v0" # Same description used in the experiments.
# MODEL_TYPES specifies each model type and the description used in their
# experiments.
MODEL_TYPES="[('pg', '$DESC'), ('topk', '$DESC'),
('ga', '$DESC'), ('rand', '$DESC')]"
TASKS="" # Empty string will default to all ICLR tasks.
# To specify custom task list, give task names separated by spaces. Example:
# TASKS="reverse remove-char"
bazel run single_task:aggregate_experiment_results -- \
--models_dir="$MODELS_DIR" \
--max_npe="$MAX_NPE" \
--task_list="$TASKS" \
--model_types="$MODEL_TYPES" \
--csv_file="/tmp/results_table.csv"
```
## Reproduce shortest code examples in paper
```bash
# Maximum NPE is higher here. We only do 1 repetition, and the algorithm needs
# time to simplify its solution.
MODELS_DIR="/tmp/models"
NPE="500M"
DESC="short-code"
./single_task/run_eval_tasks.py \
--exp "simpl-$NPE" --desc "$DESC" --iclr_tasks --reps 1
# Aggregate best code strings. Run after training completes.
TASKS="" # Empty string. Will default to all ICLR tasks.
bazel run single_task:aggregate_experiment_results -- \
--models_dir="$MODELS_DIR" \
--max_npe="$NPE" \
--task_list="$TASKS" \
--model_types="[('topk', '$DESC')]" \
--data=code
```
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
r"""This script crawls experiment directories for results and aggregates them.
Usage example:
MODELS_DIR="/tmp/models"
bazel run single_task:aggregate_experiment_results -- \
--models_dir="$MODELS_DIR" \
--max_npe="20M" \
--task_list="add echo" \
--model_types="[('topk', 'v0'), ('ga', 'v0')]" \
--csv_file=/tmp/results_table.csv
"""
import ast
from collections import namedtuple
import csv
import os
import re
import StringIO
import sys
from absl import app
from absl import flags
import numpy as np
import tensorflow as tf
from single_task import misc # brain coder
from single_task import results_lib # brain coder
DEFAULT_MODELS = [('pg', 'v0'), ('topk', 'v0'), ('ga', 'v0'), ('rand', 'v0')]
DEFAULT_TASKS = [
'reverse', 'remove-char', 'count-char', 'add', 'bool-logic', 'print-hello',
'echo-twice', 'echo-thrice', 'copy-reverse', 'zero-cascade', 'cascade',
'shift-left', 'shift-right', 'riffle', 'unriffle', 'middle-char',
'remove-last', 'remove-last-two', 'echo-alternating', 'echo-half', 'length',
'echo-second-seq', 'echo-nth-seq', 'substring', 'divide-2', 'dedup']
FLAGS = flags.FLAGS
flags.DEFINE_string(
'models_dir', '',
'Absolute path where results folders are found.')
flags.DEFINE_string(
'exp_prefix', 'bf_rl_iclr',
'Prefix for all experiment folders.')
flags.DEFINE_string(
'max_npe', '5M',
'String representation of max NPE of the experiments.')
flags.DEFINE_spaceseplist(
'task_list', DEFAULT_TASKS,
'List of task names separated by spaces. If empty string, defaults to '
'`DEFAULT_TASKS`. These are the rows of the results table.')
flags.DEFINE_string(
'model_types', str(DEFAULT_MODELS),
'String representation of a python list of 2-tuples, each a model_type + '
'job description pair. Descriptions allow you to choose among different '
'runs of the same experiment. These are the columns of the results table.')
flags.DEFINE_string(
'csv_file', '/tmp/results_table.csv',
'Where to write results table. Format is CSV.')
flags.DEFINE_enum(
'data', 'success_rates', ['success_rates', 'code'],
'What type of data to aggregate.')
def make_csv_string(table):
"""Convert 2D list to CSV string."""
s = StringIO.StringIO()
writer = csv.writer(s)
writer.writerows(table)
value = s.getvalue()
s.close()
return value
def process_results(metrics):
"""Extract useful information from given metrics.
Args:
metrics: List of results dicts. These should have been written to disk by
training jobs.
Returns:
Dict mapping stats names to values.
Raises:
ValueError: If max_npe or max_global_repetitions values are inconsistant
across dicts in the `metrics` list.
"""
count = len(metrics)
success_count = 0
total_npe = 0 # Counting NPE across all runs.
success_npe = 0 # Counting NPE in successful runs only.
max_npe = 0
max_repetitions = 0
for metric_dict in metrics:
if not max_npe:
max_npe = metric_dict['max_npe']
elif max_npe != metric_dict['max_npe']:
raise ValueError(
'Invalid experiment. Different reps have different max-NPE settings.')
if not max_repetitions:
max_repetitions = metric_dict['max_global_repetitions']
elif max_repetitions != metric_dict['max_global_repetitions']:
raise ValueError(
'Invalid experiment. Different reps have different num-repetition '
'settings.')
if metric_dict['found_solution']:
success_count += 1
success_npe += metric_dict['npe']
total_npe += metric_dict['npe']
stats = {}
stats['max_npe'] = max_npe
stats['max_repetitions'] = max_repetitions
stats['repetitions'] = count
stats['successes'] = success_count # successful reps
stats['failures'] = count - success_count # failed reps
stats['success_npe'] = success_npe
stats['total_npe'] = total_npe
if success_count:
# Only successful runs counted.
stats['avg_success_npe'] = stats['success_npe'] / float(success_count)
else:
stats['avg_success_npe'] = 0.0
if count:
stats['success_rate'] = success_count / float(count)
stats['avg_total_npe'] = stats['total_npe'] / float(count)
else:
stats['success_rate'] = 0.0
stats['avg_total_npe'] = 0.0
return stats
ProcessedResults = namedtuple('ProcessedResults', ['metrics', 'processed'])
def get_results_for_experiment(
models_dir, task_name, model_type='pg', max_npe='5M', desc='v0',
name_prefix='bf_rl_paper', extra_desc=''):
"""Get and process results for a given experiment.
An experiment is a set of runs with the same hyperparameters and environment.
It is uniquely specified by a (task_name, model_type, max_npe) triple, as
well as an optional description.
We assume that each experiment has a folder with the same name as the job that
ran the experiment. The name is computed by
"%name_prefix%.%desc%-%max_npe%_%task_name%".
Args:
models_dir: Parent directory containing experiment folders.
task_name: String name of task (the coding env). See code_tasks.py or
run_eval_tasks.py
model_type: Name of the algorithm, such as 'pg', 'topk', 'ga', 'rand'.
max_npe: String SI unit representation of the maximum NPE threshold for the
experiment. For example, "5M" means 5 million.
desc: Description.
name_prefix: Prefix of job names. Normally leave this as default.
extra_desc: Optional extra description at the end of the job name.
Returns:
ProcessedResults namedtuple instance, containing
metrics: Raw dicts read from disk.
processed: Stats computed by `process_results`.
Raises:
ValueError: If max_npe in the metrics does not match NPE in the experiment
folder name.
"""
folder = name_prefix + '.{0}.{1}-{2}_{3}'.format(desc, model_type, max_npe,
task_name)
if extra_desc:
folder += '.' + extra_desc
results = results_lib.Results(os.path.join(models_dir, folder))
metrics, _ = results.read_all()
processed = process_results(metrics)
if (not np.isclose(processed['max_npe'], misc.si_to_int(max_npe))
and processed['repetitions']):
raise ValueError(
'Invalid experiment. Max-NPE setting does not match expected max-NPE '
'in experiment name.')
return ProcessedResults(metrics=metrics, processed=processed)
BestCodeResults = namedtuple(
'BestCodeResults',
['code', 'reward', 'npe', 'folder', 'finished', 'error'])
class BestCodeResultError(object):
success = 0
no_solution_found = 1
experiment_does_not_exist = 2
def get_best_code_for_experiment(
models_dir, task_name, model_type='pg', max_npe='5M', desc=0,
name_prefix='bf_rl_paper', extra_desc=''):
"""Like `get_results_for_experiment`, but fetches the code solutions."""
folder = name_prefix + '.{0}.{1}-{2}_{3}'.format(desc, model_type, max_npe,
task_name)
if extra_desc:
folder += '.' + extra_desc
log_dir = os.path.join(models_dir, folder, 'logs')
search_regex = r'^solutions_([0-9])+\.txt$'
try:
all_children = tf.gfile.ListDirectory(log_dir)
except tf.errors.NotFoundError:
return BestCodeResults(
code=None, reward=0.0, npe=0, folder=folder, finished=False,
error=BestCodeResultError.experiment_does_not_exist)
solution_files = [
fname for fname in all_children if re.search(search_regex, fname)]
max_reward = 0.0
npe = 0
best_code = None
for fname in solution_files:
with tf.gfile.FastGFile(os.path.join(log_dir, fname), 'r') as reader:
results = [ast.literal_eval(entry) for entry in reader]
for res in results:
if res['reward'] > max_reward:
best_code = res['code']
max_reward = res['reward']
npe = res['npe']
error = (
BestCodeResultError.success if best_code
else BestCodeResultError.no_solution_found)
try:
# If there is a status.txt file, check if it contains the status of the job.
with tf.gfile.FastGFile(os.path.join(log_dir, 'status.txt'), 'r') as f:
# Job is done, so mark this experiment as finished.
finished = f.read().lower().strip() == 'done'
except tf.errors.NotFoundError:
# No status file has been written, so the experiment is not done. No need to
# report an error here, because we do not require that experiment jobs write
# out a status.txt file until they have finished.
finished = False
return BestCodeResults(
code=best_code, reward=max_reward, npe=npe, folder=folder,
finished=finished, error=error)
def make_results_table(
models=None,
tasks=None,
max_npe='5M',
name_prefix='bf_rl_paper',
extra_desc='',
models_dir='/tmp'):
"""Creates a table of results: algorithm + version by tasks.
Args:
models: The table columns. A list of (algorithm, desc) tuples.
tasks: The table rows. List of task names.
max_npe: String SI unit representation of the maximum NPE threshold for the
experiment. For example, "5M" means 5 million. All entries in the table
share the same max-NPE.
name_prefix: Name prefix used in logging directory for the experiment.
extra_desc: Extra description added to name of logging directory for the
experiment.
models_dir: Parent directory containing all experiment folders.
Returns:
A 2D list holding the table cells.
"""
if models is None:
models = DEFAULT_MODELS
if tasks is None:
tasks = DEFAULT_TASKS
model_results = {}
for model_type, desc in models:
model_results[model_type] = {
tname: get_results_for_experiment(
models_dir, tname, model_type, max_npe, desc,
name_prefix=name_prefix, extra_desc=extra_desc
).processed
for tname in tasks}
def info(stats):
return [str(stats['repetitions']),
'%.2f' % stats['success_rate'],
str(int(stats['avg_total_npe']))]
rows = [['max NPE: ' + max_npe]
+ misc.flatten([['{0} ({1})'.format(m, d), '', '']
for m, d in models])]
rows.append(
[''] + misc.flatten([['reps', 'success rate', 'avg NPE']
for _ in models]))
for tname in tasks:
rows.append(
[tname]
+ misc.flatten([info(model_results[model][tname])
for model, _ in models]))
return rows
def print_results_table(results_table):
"""Print human readable results table to stdout."""
print('')
print('=== Results Table ===')
print('Format: # reps [success rate, avg total NPE]')
def info_str(info_row):
# num_runs (success_rate, avg_total_npe)
if not info_row[0]:
return '0'
return '%s [%s, %s]' % (str(info_row[0]).ljust(2), info_row[1], info_row[2])
nc = len(results_table[0]) # num cols
out_table = [
[results_table[0][0]] + [results_table[0][i] for i in range(1, nc, 3)]]
for row in results_table[2:]:
out_table.append([row[0]] + [info_str(row[i:i+3]) for i in range(1, nc, 3)])
nc = len(out_table[0]) # num cols
col_widths = [max(len(row[col]) for row in out_table) for col in range(nc)]
table_string = ''
for row in out_table:
table_string += ''.join(
[row[c].ljust(col_widths[c] + 2) for c in range(nc)]) + '\n'
print(table_string)
def main(argv):
del argv # Unused.
name_prefix = FLAGS.exp_prefix
print('Experiments prefix: %s' % name_prefix)
model_types = ast.literal_eval(FLAGS.model_types)
if FLAGS.data == 'success_rates':
results_table = make_results_table(
models=model_types, tasks=FLAGS.task_list, max_npe=FLAGS.max_npe,
models_dir=FLAGS.models_dir,
name_prefix=name_prefix, extra_desc='')
with tf.gfile.FastGFile(FLAGS.csv_file, 'w') as f:
f.write(make_csv_string(results_table))
print_results_table(results_table)
else:
# Best code
print('* = experiment is still running')
print('')
print('=== Best Synthesized Code ===')
for model_type, desc in model_types:
print('%s (%s)' % (model_type, desc))
sys.stdout.flush()
for tname in FLAGS.task_list:
res = get_best_code_for_experiment(
FLAGS.models_dir, tname, model_type, FLAGS.max_npe, desc,
name_prefix=name_prefix, extra_desc='')
unfinished_mark = '' if res.finished else ' *'
tname += unfinished_mark
if res.error == BestCodeResultError.success:
print(' %s' % tname)
print(' %s' % res.code)
print(' R=%.6f, NPE=%s' % (res.reward, misc.int_to_si(res.npe)))
elif res.error == BestCodeResultError.experiment_does_not_exist:
print(' Experiment does not exist. Check arguments.')
print(' Experiment folder: %s' % res.folder)
break
else:
print(' %s' % tname)
print(' (none)')
sys.stdout.flush()
if __name__ == '__main__':
app.run(main)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment