Open source release of Brain Coder.

a00f7e2b · Dan Abolafia · 54babf62 · a00f7e2b · a00f7e2b · a00f7e2b
Commit a00f7e2b authored Jan 08, 2018 by Dan Abolafia
20 changed files
--- a/research/brain_coder/single_task/aggregate_tuning_results.py
+++ b/research/brain_coder/single_task/aggregate_tuning_results.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+r"""After running tuning, use this script to aggregate the results.
+Usage:
+OUT_DIR="<my_tuning_dir>"
+bazel run -c opt single_task:aggregate_tuning_results -- \
+    --alsologtostderr \
+    --tuning_dir="$OUT_DIR"
+"""
+import ast
+import os
+from absl import app
+from absl import flags
+import tensorflow as tf
+FLAGS = flags.FLAGS
+flags.DEFINE_string(
+    'tuning_dir', '',
+    'Absolute path where results tuning trial folders are found.')
+def main(argv):
+  del argv  # Unused.
+  try:
+    trial_dirs = tf.gfile.ListDirectory(FLAGS.tuning_dir)
+  except tf.errors.NotFoundError:
+    print('Tuning directory %s does not exist.' % (FLAGS.tuning_dir,))
+    return
+  metrics = []
+  for trial_dir in trial_dirs:
+    tuning_results_file = os.path.join(
+        FLAGS.tuning_dir, trial_dir, 'tuning_results.txt')
+    if tf.gfile.Exists(tuning_results_file):
+      with tf.gfile.FastGFile(tuning_results_file, 'r') as reader:
+        for line in reader:
+          metrics.append(ast.literal_eval(line.replace(': nan,', ': 0.0,')))
+  if not metrics:
+    print('No trials found.')
+    return
+  num_trials = [m['num_trials'] for m in metrics]
+  assert all(n == num_trials[0] for n in num_trials)
+  num_trials = num_trials[0]
+  print('Found %d completed trials out of %d' % (len(metrics), num_trials))
+  # Sort by objective descending.
+  sorted_trials = sorted(metrics, key=lambda m: -m['objective'])
+  for i, metrics in enumerate(sorted_trials):
+    hparams = metrics['hparams']
+    keys = sorted(hparams.keys())
+    print(
+        str(i).ljust(4) + ': '
+        + '{0:.2f}'.format(metrics['objective']).ljust(10)
+        + '['
+        + ','.join(['{}={}'.format(k, hparams[k]).ljust(24) for k in keys])
+        + ']')
+if __name__ == '__main__':
+  app.run(main)
--- a/research/brain_coder/single_task/code_tasks.py
+++ b/research/brain_coder/single_task/code_tasks.py
--- a/research/brain_coder/single_task/code_tasks_test.py
+++ b/research/brain_coder/single_task/code_tasks_test.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+"""Tests for code_tasks."""
+import numpy as np
+import tensorflow as tf
+from single_task import code_tasks  # brain coder
+from single_task import defaults  # brain coder
+def pad(string, pad_length, pad_char):
+  return string + pad_char * (pad_length - len(string))
+class CodeTasksTest(tf.test.TestCase):
+  def assertClose(self, a, b):
+    self.assertTrue(
+        np.isclose(a, b, atol=1e-4),
+        'Expecting approximately equal values. Got: %s, %s' % (a, b))
+  def testMultiIOTaskManager(self):
+    maxlen = 100
+    padchr = '['
+    task = code_tasks.make_paper_task(
+        'print', timestep_limit=maxlen, do_code_simplification=False)
+    reward_fns = task.rl_batch(1)
+    r = reward_fns[0]
+    self.assertClose(
+        r(pad('++++++++.---.+++++++...', maxlen, padchr)).episode_rewards[-1],
+        0.2444)
+    self.assertClose(
+        r(pad('++++++++.---.+++++++..+++.',
+              maxlen, padchr)).episode_rewards[-1],
+        1.0)
+    task = code_tasks.make_paper_task(
+        'print', timestep_limit=maxlen, do_code_simplification=True)
+    reward_fns = task.rl_batch(1)
+    r = reward_fns[0]
+    self.assertClose(
+        r('++++++++.---.+++++++...').episode_rewards[-1],
+        0.2444)
+    self.assertClose(
+        r('++++++++.---.+++++++..+++.').episode_rewards[-1],
+        0.935)
+    self.assertClose(
+        r(pad('++++++++.---.+++++++..+++.',
+              maxlen, padchr)).episode_rewards[-1],
+        0.75)
+    task = code_tasks.make_paper_task(
+        'reverse', timestep_limit=maxlen, do_code_simplification=False)
+    reward_fns = task.rl_batch(1)
+    r = reward_fns[0]
+    self.assertClose(
+        r(pad('>,>,>,.<.<.<.', maxlen, padchr)).episode_rewards[-1],
+        0.1345)
+    self.assertClose(
+        r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1],
+        1.0)
+    task = code_tasks.make_paper_task(
+        'reverse', timestep_limit=maxlen, do_code_simplification=True)
+    reward_fns = task.rl_batch(1)
+    r = reward_fns[0]
+    self.assertClose(r('>,>,>,.<.<.<.').episode_rewards[-1], 0.1324)
+    self.assertClose(r(',[>,]+[,<.]').episode_rewards[-1], 0.9725)
+    self.assertClose(
+        r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1],
+        0.75)
+  def testMakeTask(self):
+    maxlen = 100
+    padchr = '['
+    config = defaults.default_config_with_updates(
+        'env=c(config_for_iclr=False,fixed_string=[8,5,12,12,15])')
+    task = code_tasks.make_task(config.env, 'print', timestep_limit=maxlen)
+    reward_fns = task.rl_batch(1)
+    r = reward_fns[0]
+    self.assertClose(
+        r('++++++++.---.+++++++...').episode_rewards[-1],
+        0.2444)
+    self.assertClose(
+        r('++++++++.---.+++++++..+++.').episode_rewards[-1],
+        0.935)
+    self.assertClose(
+        r(pad('++++++++.---.+++++++..+++.',
+              maxlen, padchr)).episode_rewards[-1],
+        0.75)
+  def testKnownCodeBaseTask(self):
+    maxlen = 100
+    padchr = '['
+    task = code_tasks.make_paper_task(
+        'shift-left', timestep_limit=maxlen, do_code_simplification=False)
+    reward_fns = task.rl_batch(1)
+    r = reward_fns[0]
+    self.assertClose(
+        r(pad(',>,[.,]<.,.', maxlen, padchr)).episode_rewards[-1],
+        1.0)
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/brain_coder/single_task/data.py
+++ b/research/brain_coder/single_task/data.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+"""Manage data for pretraining and RL tasks."""
+import ast
+from collections import namedtuple
+from absl import logging
+from single_task import code_tasks  # brain coder
+RLBatch = namedtuple('RLBatch', ['reward_fns', 'batch_size', 'good_reward'])
+class DataManager(object):
+  """Interface between environment and model."""
+  def __init__(self, global_config, run_number=None,
+               do_code_simplification=False):
+    """Constructs a DataManager.
+    Args:
+      global_config: A config_lib.Config instance containing all config. See
+          config in defaults.py.
+      run_number: Which run this is (of the same experiment). This should be set
+          when a task cycle is defined in the config. A task cycle is a list of
+          tasks to cycle through repeatedly, and the selected task is a function
+          of the run number, i.e. 0-th run, 1-st run, 2-nd run, etc...
+          This can be None if only a single task is set in the config.
+      do_code_simplification: When global_config.env.config_for_iclr is True,
+          use this option to create code simplification (code golf) tasks, vs
+          fixed length coding tasks. If True, a task with code simplification
+          reward will be constructed.
+    Raises:
+      ValueError: If global_config.env.task and global_config.env.task_cycle
+          are both set, or both not set. Only one should be given.
+      ValueError: If global_config.env.task_cycle is set but run_number is None.
+    """
+    env_config = global_config.env
+    self.batch_size = global_config.batch_size
+    if env_config.task_cycle:
+      if env_config.task:
+        raise ValueError('Do not set both `task` and `task_cycle`.')
+      if run_number is None:
+        raise ValueError('Do not use task_cycle for single-run experiment.')
+      index = run_number % len(env_config.task_cycle)
+      self.task_name = env_config.task_cycle[index]
+      logging.info('run_number: %d,  task_cycle index: %d', run_number, index)
+      logging.info('task_cycle: %s', env_config.task_cycle)
+    elif env_config.task:
+      self.task_name = env_config.task
+    else:
+      raise ValueError('Either `task` or `task_cycle` must be set.')
+    logging.info('Task for this run: "%s"', self.task_name)
+    logging.info('config_for_iclr=True; do_code_simplification=%s',
+                 do_code_simplification)
+    self.rl_task = code_tasks.make_task(
+        task_name=self.task_name,
+        override_kwargs=ast.literal_eval(env_config.task_kwargs),
+        max_code_length=global_config.timestep_limit,
+        require_correct_syntax=env_config.correct_syntax,
+        do_code_simplification=do_code_simplification,
+        correct_bonus=env_config.task_manager_config.correct_bonus,
+        code_length_bonus=env_config.task_manager_config.code_length_bonus)
+  def sample_rl_batch(self):
+    """Create reward functions from the current task.
+    Returns:
+      RLBatch namedtuple instance, which holds functions and information for
+      a minibatch of episodes.
+      * reward_fns: A reward function for each episode. Maps code string to
+          reward.
+      * batch_size: Number of episodes in this minibatch.
+      * good_reward: Estimated threshold of rewards which indicate the algorithm
+          is starting to solve the task. This is a heuristic that tries to
+          reduce the amount of stuff written to disk.
+    """
+    reward_fns = self.rl_task.rl_batch(self.batch_size)
+    return RLBatch(
+        reward_fns=reward_fns,
+        batch_size=self.batch_size,
+        good_reward=self.rl_task.good_reward)
--- a/research/brain_coder/single_task/defaults.py
+++ b/research/brain_coder/single_task/defaults.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+"""Default configuration for agent and environment."""
+from absl import logging
+from common import config_lib  # brain coder
+def default_config():
+  return config_lib.Config(
+      agent=config_lib.OneOf(
+          [config_lib.Config(
+              algorithm='pg',
+              policy_lstm_sizes=[35,35],
+              # Set value_lstm_sizes to None to share weights with policy.
+              value_lstm_sizes=[35,35],
+              obs_embedding_size=10,
+              grad_clip_threshold=10.0,
+              param_init_factor=1.0,
+              lr=5e-5,
+              pi_loss_hparam=1.0,
+              vf_loss_hparam=0.5,
+              entropy_beta=1e-2,
+              regularizer=0.0,
+              softmax_tr=1.0,  # Reciprocal temperature.
+              optimizer='rmsprop',  # 'adam', 'sgd', 'rmsprop'
+              topk=0,  # Top-k unique codes will be stored.
+              topk_loss_hparam=0.0,  # off policy loss multiplier.
+              # Uniformly sample this many episodes from topk buffer per batch.
+              # If topk is 0, this has no effect.
+              topk_batch_size=1,
+              # Exponential moving average baseline for REINFORCE.
+              # If zero, A2C is used.
+              # If non-zero, should be close to 1, like .99, .999, etc.
+              ema_baseline_decay=0.99,
+              # Whether agent can emit EOS token. If true, agent can emit EOS
+              # token which ends the episode early (ends the sequence).
+              # If false, agent must emit tokens until the timestep limit is
+              # reached. e.g. True means variable length code, False means fixed
+              # length code.
+              # WARNING: Making this false slows things down.
+              eos_token=False,
+              replay_temperature=1.0,
+              # Replay probability. 1 = always replay, 0 = always on policy.
+              alpha=0.0,
+              # Whether to normalize importance weights in each minibatch.
+              iw_normalize=True),
+           config_lib.Config(
+              algorithm='ga',
+              crossover_rate=0.99,
+              mutation_rate=0.086),
+           config_lib.Config(
+              algorithm='rand')],
+          algorithm='pg',
+      ),
+      env=config_lib.Config(
+          # If True, task-specific settings are not needed.
+          task='',  # 'print', 'echo', 'reverse', 'remove', ...
+          task_cycle=[],  # If non-empty, reptitions will cycle through tasks.
+          task_kwargs='{}',  # Python dict literal.
+          task_manager_config=config_lib.Config(
+              # Reward recieved per test case. These bonuses will be scaled
+              # based on how many test cases there are.
+              correct_bonus=2.0,  # Bonus for code getting correct answer.
+              code_length_bonus=1.0),  # Maximum bonus for short code.
+          correct_syntax=False,
+      ),
+      batch_size=64,
+      timestep_limit=32)
+def default_config_with_updates(config_string, do_logging=True):
+  if do_logging:
+    logging.info('Config string: "%s"', config_string)
+  config = default_config()
+  config.strict_update(config_lib.Config.parse(config_string))
+  if do_logging:
+    logging.info('Config:\n%s', config.pretty_str())
+  return config
--- a/research/brain_coder/single_task/ga_lib.py
+++ b/research/brain_coder/single_task/ga_lib.py
--- a/research/brain_coder/single_task/ga_train.py
+++ b/research/brain_coder/single_task/ga_train.py
--- a/research/brain_coder/single_task/ga_train_test.py
+++ b/research/brain_coder/single_task/ga_train_test.py
--- a/research/brain_coder/single_task/launch_training.sh
+++ b/research/brain_coder/single_task/launch_training.sh
--- a/research/brain_coder/single_task/launch_tuning.sh
+++ b/research/brain_coder/single_task/launch_tuning.sh
--- a/research/brain_coder/single_task/misc.py
+++ b/research/brain_coder/single_task/misc.py
--- a/research/brain_coder/single_task/pg_agent.py
+++ b/research/brain_coder/single_task/pg_agent.py
--- a/research/brain_coder/single_task/pg_agent_test.py
+++ b/research/brain_coder/single_task/pg_agent_test.py
--- a/research/brain_coder/single_task/pg_train.py
+++ b/research/brain_coder/single_task/pg_train.py
--- a/research/brain_coder/single_task/pg_train_test.py
+++ b/research/brain_coder/single_task/pg_train_test.py
--- a/research/brain_coder/single_task/results_lib.py
+++ b/research/brain_coder/single_task/results_lib.py
--- a/research/brain_coder/single_task/results_lib_test.py
+++ b/research/brain_coder/single_task/results_lib_test.py
--- a/research/brain_coder/single_task/run.py
+++ b/research/brain_coder/single_task/run.py
--- a/research/brain_coder/single_task/run_eval_tasks.py
+++ b/research/brain_coder/single_task/run_eval_tasks.py
--- a/research/brain_coder/single_task/test_tasks.py
+++ b/research/brain_coder/single_task/test_tasks.py