Commit a00f7e2b authored by Dan Abolafia's avatar Dan Abolafia
Browse files

Open source release of Brain Coder.

parent 54babf62
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
r"""After running tuning, use this script to aggregate the results.
Usage:
OUT_DIR="<my_tuning_dir>"
bazel run -c opt single_task:aggregate_tuning_results -- \
--alsologtostderr \
--tuning_dir="$OUT_DIR"
"""
import ast
import os
from absl import app
from absl import flags
import tensorflow as tf
FLAGS = flags.FLAGS
flags.DEFINE_string(
'tuning_dir', '',
'Absolute path where results tuning trial folders are found.')
def main(argv):
del argv # Unused.
try:
trial_dirs = tf.gfile.ListDirectory(FLAGS.tuning_dir)
except tf.errors.NotFoundError:
print('Tuning directory %s does not exist.' % (FLAGS.tuning_dir,))
return
metrics = []
for trial_dir in trial_dirs:
tuning_results_file = os.path.join(
FLAGS.tuning_dir, trial_dir, 'tuning_results.txt')
if tf.gfile.Exists(tuning_results_file):
with tf.gfile.FastGFile(tuning_results_file, 'r') as reader:
for line in reader:
metrics.append(ast.literal_eval(line.replace(': nan,', ': 0.0,')))
if not metrics:
print('No trials found.')
return
num_trials = [m['num_trials'] for m in metrics]
assert all(n == num_trials[0] for n in num_trials)
num_trials = num_trials[0]
print('Found %d completed trials out of %d' % (len(metrics), num_trials))
# Sort by objective descending.
sorted_trials = sorted(metrics, key=lambda m: -m['objective'])
for i, metrics in enumerate(sorted_trials):
hparams = metrics['hparams']
keys = sorted(hparams.keys())
print(
str(i).ljust(4) + ': '
+ '{0:.2f}'.format(metrics['objective']).ljust(10)
+ '['
+ ','.join(['{}={}'.format(k, hparams[k]).ljust(24) for k in keys])
+ ']')
if __name__ == '__main__':
app.run(main)
This diff is collapsed.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Tests for code_tasks."""
import numpy as np
import tensorflow as tf
from single_task import code_tasks # brain coder
from single_task import defaults # brain coder
def pad(string, pad_length, pad_char):
return string + pad_char * (pad_length - len(string))
class CodeTasksTest(tf.test.TestCase):
def assertClose(self, a, b):
self.assertTrue(
np.isclose(a, b, atol=1e-4),
'Expecting approximately equal values. Got: %s, %s' % (a, b))
def testMultiIOTaskManager(self):
maxlen = 100
padchr = '['
task = code_tasks.make_paper_task(
'print', timestep_limit=maxlen, do_code_simplification=False)
reward_fns = task.rl_batch(1)
r = reward_fns[0]
self.assertClose(
r(pad('++++++++.---.+++++++...', maxlen, padchr)).episode_rewards[-1],
0.2444)
self.assertClose(
r(pad('++++++++.---.+++++++..+++.',
maxlen, padchr)).episode_rewards[-1],
1.0)
task = code_tasks.make_paper_task(
'print', timestep_limit=maxlen, do_code_simplification=True)
reward_fns = task.rl_batch(1)
r = reward_fns[0]
self.assertClose(
r('++++++++.---.+++++++...').episode_rewards[-1],
0.2444)
self.assertClose(
r('++++++++.---.+++++++..+++.').episode_rewards[-1],
0.935)
self.assertClose(
r(pad('++++++++.---.+++++++..+++.',
maxlen, padchr)).episode_rewards[-1],
0.75)
task = code_tasks.make_paper_task(
'reverse', timestep_limit=maxlen, do_code_simplification=False)
reward_fns = task.rl_batch(1)
r = reward_fns[0]
self.assertClose(
r(pad('>,>,>,.<.<.<.', maxlen, padchr)).episode_rewards[-1],
0.1345)
self.assertClose(
r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1],
1.0)
task = code_tasks.make_paper_task(
'reverse', timestep_limit=maxlen, do_code_simplification=True)
reward_fns = task.rl_batch(1)
r = reward_fns[0]
self.assertClose(r('>,>,>,.<.<.<.').episode_rewards[-1], 0.1324)
self.assertClose(r(',[>,]+[,<.]').episode_rewards[-1], 0.9725)
self.assertClose(
r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1],
0.75)
def testMakeTask(self):
maxlen = 100
padchr = '['
config = defaults.default_config_with_updates(
'env=c(config_for_iclr=False,fixed_string=[8,5,12,12,15])')
task = code_tasks.make_task(config.env, 'print', timestep_limit=maxlen)
reward_fns = task.rl_batch(1)
r = reward_fns[0]
self.assertClose(
r('++++++++.---.+++++++...').episode_rewards[-1],
0.2444)
self.assertClose(
r('++++++++.---.+++++++..+++.').episode_rewards[-1],
0.935)
self.assertClose(
r(pad('++++++++.---.+++++++..+++.',
maxlen, padchr)).episode_rewards[-1],
0.75)
def testKnownCodeBaseTask(self):
maxlen = 100
padchr = '['
task = code_tasks.make_paper_task(
'shift-left', timestep_limit=maxlen, do_code_simplification=False)
reward_fns = task.rl_batch(1)
r = reward_fns[0]
self.assertClose(
r(pad(',>,[.,]<.,.', maxlen, padchr)).episode_rewards[-1],
1.0)
if __name__ == '__main__':
tf.test.main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Manage data for pretraining and RL tasks."""
import ast
from collections import namedtuple
from absl import logging
from single_task import code_tasks # brain coder
RLBatch = namedtuple('RLBatch', ['reward_fns', 'batch_size', 'good_reward'])
class DataManager(object):
"""Interface between environment and model."""
def __init__(self, global_config, run_number=None,
do_code_simplification=False):
"""Constructs a DataManager.
Args:
global_config: A config_lib.Config instance containing all config. See
config in defaults.py.
run_number: Which run this is (of the same experiment). This should be set
when a task cycle is defined in the config. A task cycle is a list of
tasks to cycle through repeatedly, and the selected task is a function
of the run number, i.e. 0-th run, 1-st run, 2-nd run, etc...
This can be None if only a single task is set in the config.
do_code_simplification: When global_config.env.config_for_iclr is True,
use this option to create code simplification (code golf) tasks, vs
fixed length coding tasks. If True, a task with code simplification
reward will be constructed.
Raises:
ValueError: If global_config.env.task and global_config.env.task_cycle
are both set, or both not set. Only one should be given.
ValueError: If global_config.env.task_cycle is set but run_number is None.
"""
env_config = global_config.env
self.batch_size = global_config.batch_size
if env_config.task_cycle:
if env_config.task:
raise ValueError('Do not set both `task` and `task_cycle`.')
if run_number is None:
raise ValueError('Do not use task_cycle for single-run experiment.')
index = run_number % len(env_config.task_cycle)
self.task_name = env_config.task_cycle[index]
logging.info('run_number: %d, task_cycle index: %d', run_number, index)
logging.info('task_cycle: %s', env_config.task_cycle)
elif env_config.task:
self.task_name = env_config.task
else:
raise ValueError('Either `task` or `task_cycle` must be set.')
logging.info('Task for this run: "%s"', self.task_name)
logging.info('config_for_iclr=True; do_code_simplification=%s',
do_code_simplification)
self.rl_task = code_tasks.make_task(
task_name=self.task_name,
override_kwargs=ast.literal_eval(env_config.task_kwargs),
max_code_length=global_config.timestep_limit,
require_correct_syntax=env_config.correct_syntax,
do_code_simplification=do_code_simplification,
correct_bonus=env_config.task_manager_config.correct_bonus,
code_length_bonus=env_config.task_manager_config.code_length_bonus)
def sample_rl_batch(self):
"""Create reward functions from the current task.
Returns:
RLBatch namedtuple instance, which holds functions and information for
a minibatch of episodes.
* reward_fns: A reward function for each episode. Maps code string to
reward.
* batch_size: Number of episodes in this minibatch.
* good_reward: Estimated threshold of rewards which indicate the algorithm
is starting to solve the task. This is a heuristic that tries to
reduce the amount of stuff written to disk.
"""
reward_fns = self.rl_task.rl_batch(self.batch_size)
return RLBatch(
reward_fns=reward_fns,
batch_size=self.batch_size,
good_reward=self.rl_task.good_reward)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Default configuration for agent and environment."""
from absl import logging
from common import config_lib # brain coder
def default_config():
return config_lib.Config(
agent=config_lib.OneOf(
[config_lib.Config(
algorithm='pg',
policy_lstm_sizes=[35,35],
# Set value_lstm_sizes to None to share weights with policy.
value_lstm_sizes=[35,35],
obs_embedding_size=10,
grad_clip_threshold=10.0,
param_init_factor=1.0,
lr=5e-5,
pi_loss_hparam=1.0,
vf_loss_hparam=0.5,
entropy_beta=1e-2,
regularizer=0.0,
softmax_tr=1.0, # Reciprocal temperature.
optimizer='rmsprop', # 'adam', 'sgd', 'rmsprop'
topk=0, # Top-k unique codes will be stored.
topk_loss_hparam=0.0, # off policy loss multiplier.
# Uniformly sample this many episodes from topk buffer per batch.
# If topk is 0, this has no effect.
topk_batch_size=1,
# Exponential moving average baseline for REINFORCE.
# If zero, A2C is used.
# If non-zero, should be close to 1, like .99, .999, etc.
ema_baseline_decay=0.99,
# Whether agent can emit EOS token. If true, agent can emit EOS
# token which ends the episode early (ends the sequence).
# If false, agent must emit tokens until the timestep limit is
# reached. e.g. True means variable length code, False means fixed
# length code.
# WARNING: Making this false slows things down.
eos_token=False,
replay_temperature=1.0,
# Replay probability. 1 = always replay, 0 = always on policy.
alpha=0.0,
# Whether to normalize importance weights in each minibatch.
iw_normalize=True),
config_lib.Config(
algorithm='ga',
crossover_rate=0.99,
mutation_rate=0.086),
config_lib.Config(
algorithm='rand')],
algorithm='pg',
),
env=config_lib.Config(
# If True, task-specific settings are not needed.
task='', # 'print', 'echo', 'reverse', 'remove', ...
task_cycle=[], # If non-empty, reptitions will cycle through tasks.
task_kwargs='{}', # Python dict literal.
task_manager_config=config_lib.Config(
# Reward recieved per test case. These bonuses will be scaled
# based on how many test cases there are.
correct_bonus=2.0, # Bonus for code getting correct answer.
code_length_bonus=1.0), # Maximum bonus for short code.
correct_syntax=False,
),
batch_size=64,
timestep_limit=32)
def default_config_with_updates(config_string, do_logging=True):
if do_logging:
logging.info('Config string: "%s"', config_string)
config = default_config()
config.strict_update(config_lib.Config.parse(config_string))
if do_logging:
logging.info('Config:\n%s', config.pretty_str())
return config
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment