Unverified Commit 6b9d5fba authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Merge branch 'master' into patch-1

parents 5fd687c5 5fa2a4e6
......@@ -18,6 +18,8 @@ This file calls functions to load & pre-process data, construct the TF graph
and performs training or evaluation as specified by the flag evaluator_job
Author: aneelakantan (Arvind Neelakantan)
"""
from __future__ import print_function
import time
from random import Random
import numpy as np
......@@ -113,9 +115,9 @@ def evaluate(sess, data, batch_size, graph, i):
graph))
gc += ct * batch_size
num_examples += batch_size
print "dev set accuracy after ", i, " : ", gc / num_examples
print num_examples, len(data)
print "--------"
print("dev set accuracy after ", i, " : ", gc / num_examples)
print(num_examples, len(data))
print("--------")
def Train(graph, utility, batch_size, train_data, sess, model_dir,
......@@ -142,15 +144,15 @@ def Train(graph, utility, batch_size, train_data, sess, model_dir,
if (i > 0 and i % FLAGS.eval_cycle == 0):
end = time.time()
time_taken = end - start
print "step ", i, " ", time_taken, " seconds "
print("step ", i, " ", time_taken, " seconds ")
start = end
print " printing train set loss: ", train_set_loss / utility.FLAGS.eval_cycle
print(" printing train set loss: ", train_set_loss / utility.FLAGS.eval_cycle)
train_set_loss = 0.0
def master(train_data, dev_data, utility):
#creates TF graph and calls trainer or evaluator
batch_size = utility.FLAGS.batch_size
batch_size = utility.FLAGS.batch_size
model_dir = utility.FLAGS.output_dir + "/model" + utility.FLAGS.job_id + "/"
#create all paramters of the model
param_class = parameters.Parameters(utility)
......@@ -183,23 +185,23 @@ def master(train_data, dev_data, utility):
file_list = sorted(selected_models.items(), key=lambda x: x[0])
if (len(file_list) > 0):
file_list = file_list[0:len(file_list) - 1]
print "list of models: ", file_list
print("list of models: ", file_list)
for model_file in file_list:
model_file = model_file[1]
print "restoring: ", model_file
print("restoring: ", model_file)
saver.restore(sess, model_dir + "/" + model_file)
model_step = int(
model_file.split("_")[len(model_file.split("_")) - 1])
print "evaluating on dev ", model_file, model_step
print("evaluating on dev ", model_file, model_step)
evaluate(sess, dev_data, batch_size, graph, model_step)
else:
ckpt = tf.train.get_checkpoint_state(model_dir)
print "model dir: ", model_dir
print("model dir: ", model_dir)
if (not (tf.gfile.IsDirectory(utility.FLAGS.output_dir))):
print "create dir: ", utility.FLAGS.output_dir
print("create dir: ", utility.FLAGS.output_dir)
tf.gfile.MkDir(utility.FLAGS.output_dir)
if (not (tf.gfile.IsDirectory(model_dir))):
print "create dir: ", model_dir
print("create dir: ", model_dir)
tf.gfile.MkDir(model_dir)
Train(graph, utility, batch_size, train_data, sess, model_dir,
saver)
......@@ -225,10 +227,10 @@ def main(args):
train_data = data_utils.complete_wiki_processing(train_data, utility, True)
dev_data = data_utils.complete_wiki_processing(dev_data, utility, False)
test_data = data_utils.complete_wiki_processing(test_data, utility, False)
print "# train examples ", len(train_data)
print "# dev examples ", len(dev_data)
print "# test examples ", len(test_data)
print "running open source"
print("# train examples ", len(train_data))
print("# dev examples ", len(dev_data))
print("# test examples ", len(test_data))
print("running open source")
#construct TF graph and train or evaluate
master(train_data, dev_data, utility)
......
......@@ -59,7 +59,7 @@ class Parameters:
#Biases for the gates and cell
for bias in ["i", "f", "c", "o"]:
if (bias == "f"):
print "forget gate bias"
print("forget gate bias")
params[key + "_" + bias] = tf.Variable(
tf.random_uniform([embedding_dims], 1.0, 1.1, self.utility.
tf_data_type[self.utility.FLAGS.data_type]))
......
......@@ -22,6 +22,8 @@ columns.
lookup answer (or matrix) is also split into number and word lookup matrix
Author: aneelakantan (Arvind Neelakantan)
"""
from __future__ import print_function
import math
import os
import re
......@@ -56,7 +58,7 @@ def correct_unicode(string):
#string = re.sub("[“”«»]", "\"", string)
#string = re.sub("[•†‡]", "", string)
#string = re.sub("[‐‑–—]", "-", string)
string = re.sub(ur'[\u2E00-\uFFFF]', "", string)
string = re.sub(r'[\u2E00-\uFFFF]', "", string)
string = re.sub("\\s+", " ", string).strip()
return string
......@@ -78,7 +80,7 @@ def full_normalize(string):
# Remove trailing info in brackets
string = re.sub("\[[^\]]*\]", "", string)
# Remove most unicode characters in other languages
string = re.sub(ur'[\u007F-\uFFFF]', "", string.strip())
string = re.sub(r'[\u007F-\uFFFF]', "", string.strip())
# Remove trailing info in parenthesis
string = re.sub("\([^)]*\)$", "", string.strip())
string = final_normalize(string)
......@@ -207,7 +209,7 @@ class WikiQuestionGenerator(object):
self.dev_loader = WikiQuestionLoader(dev_name, root_folder)
self.test_loader = WikiQuestionLoader(test_name, root_folder)
self.bad_examples = 0
self.root_folder = root_folder
self.root_folder = root_folder
self.data_folder = os.path.join(self.root_folder, "annotated/data")
self.annotated_examples = {}
self.annotated_tables = {}
......@@ -298,7 +300,7 @@ class WikiQuestionGenerator(object):
question_id, question, target_canon, context)
self.annotated_tables[context] = []
counter += 1
print "Annotated examples loaded ", len(self.annotated_examples)
print("Annotated examples loaded ", len(self.annotated_examples))
f.close()
def is_number_column(self, a):
......
......@@ -20,6 +20,7 @@ import sys
import time
import numpy as np
from six.moves import xrange
import tensorflow as tf
import model as cross_conv_model
......
......@@ -18,6 +18,7 @@ import random
import sys
import numpy as np
from six.moves import xrange
import tensorflow as tf
......
......@@ -20,6 +20,7 @@ https://arxiv.org/pdf/1607.02586v1.pdf
import math
import sys
from six.moves import xrange
import tensorflow as tf
slim = tf.contrib.slim
......
......@@ -15,6 +15,7 @@
"""Read image sequence."""
from six.moves import xrange
import tensorflow as tf
......
......@@ -21,6 +21,7 @@ import sys
import numpy as np
import scipy.misc
from six.moves import xrange
import tensorflow as tf
......
......@@ -96,7 +96,7 @@ def main(_):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
image_annotations, label_map, encoded_image)
if tf_example:
shard_idx = long(image_id, 16) % FLAGS.num_shards
shard_idx = int(image_id, 16) % FLAGS.num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
......
......@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange
import tensorflow as tf
from object_detection.core import standard_fields
......
......@@ -69,7 +69,7 @@ def freeze_graph_with_def_protos(
if optimize_graph:
logging.info('Graph Rewriter optimizations enabled')
rewrite_options = rewriter_config_pb2.RewriterConfig(
optimize_tensor_layout=True)
layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
rewrite_options.optimizers.append('pruning')
rewrite_options.optimizers.append('constfold')
rewrite_options.optimizers.append('layout')
......
......@@ -21,6 +21,7 @@ Example box operations that are supported:
"""
import numpy as np
from six.moves import xrange
from object_detection.utils import np_box_list
from object_detection.utils import np_box_ops
......
......@@ -203,9 +203,9 @@ def padded_one_hot_encoding(indices, depth, left_pad):
TODO: add runtime checks for depth and indices.
"""
if depth < 0 or not isinstance(depth, (int, long) if six.PY2 else int):
if depth < 0 or not isinstance(depth, six.integer_types):
raise ValueError('`depth` must be a non-negative integer.')
if left_pad < 0 or not isinstance(left_pad, (int, long) if six.PY2 else int):
if left_pad < 0 or not isinstance(left_pad, six.integer_types):
raise ValueError('`left_pad` must be a non-negative integer.')
if depth == 0:
return None
......
......@@ -145,7 +145,7 @@ class VisualizationUtilsTest(tf.test.TestCase):
for i in range(images_with_boxes_np.shape[0]):
img_name = 'image_' + str(i) + '.png'
output_file = os.path.join(self.get_temp_dir(), img_name)
print 'Writing output image %d to %s' % (i, output_file)
print('Writing output image %d to %s' % (i, output_file))
image_pil = Image.fromarray(images_with_boxes_np[i, ...])
image_pil.save(output_file)
......
......@@ -67,20 +67,27 @@ python trainer.py --logtostderr --batch_size=25 --env=HalfCheetah-v1 \
--max_divergence=0.05 --value_opt=best_fit --critic_weight=0.0 \
```
Run Mujoco task with Trust-PCL:
To run Mujoco task using Trust-PCL (off-policy) use the below command.
It should work well across all environments, given that you
search sufficiently among
(1) max_divergence (0.001, 0.0005, 0.002 are good values),
(2) rollout (1, 5, 10 are good values),
(3) tf_seed (need to average over enough random seeds).
```
python trainer.py --logtostderr --batch_size=1 --env=HalfCheetah-v1 \
--validation_frequency=50 --rollout=10 --critic_weight=0.0 \
--gamma=0.995 --clip_norm=40 --learning_rate=0.002 \
--replay_buffer_freq=1 --replay_buffer_size=20000 \
--replay_buffer_alpha=0.1 --norecurrent --objective=pcl \
--max_step=100 --tau=0.0 --eviction=fifo --max_divergence=0.001 \
--internal_dim=64 --cutoff_agent=1000 \
--replay_batch_size=25 --nouse_online_batch --batch_by_steps \
--sample_from=target --value_opt=grad --value_hidden_layers=2 \
--update_eps_lambda --unify_episodes --clip_adv=1.0 \
--target_network_lag=0.99 --prioritize_by=step
--validation_frequency=250 --rollout=1 --critic_weight=1.0 --gamma=0.995 \
--clip_norm=40 --learning_rate=0.0001 --replay_buffer_freq=1 \
--replay_buffer_size=5000 --replay_buffer_alpha=0.001 --norecurrent \
--objective=pcl --max_step=10 --cutoff_agent=1000 --tau=0.0 --eviction=fifo \
--max_divergence=0.001 --internal_dim=256 --replay_batch_size=64 \
--nouse_online_batch --batch_by_steps --value_hidden_layers=2 \
--update_eps_lambda --nounify_episodes --target_network_lag=0.99 \
--sample_from=online --clip_adv=1 --prioritize_by=step --num_steps=1000000 \
--noinput_prev_actions --use_target_values --tf_seed=57
```
Run Mujoco task with PCL constraint trust region:
......
......@@ -20,6 +20,7 @@ In some cases this is just an additional linear layer on the policy.
In other cases, it is a completely separate neural network.
"""
from six.moves import xrange
import tensorflow as tf
import numpy as np
......
......@@ -20,6 +20,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange
import tensorflow as tf
import numpy as np
import pickle
......@@ -109,13 +110,14 @@ class Controller(object):
self.episode_running_rewards = np.zeros(len(self.env))
self.episode_running_lengths = np.zeros(len(self.env))
self.episode_rewards = []
self.greedy_episode_rewards = []
self.episode_lengths = []
self.total_rewards = []
self.best_batch_rewards = None
def setup(self):
self.model.setup()
def setup(self, train=True):
self.model.setup(train=train)
def initial_internal_state(self):
return np.zeros(self.model.policy.rnn_state_dim)
......@@ -187,7 +189,7 @@ class Controller(object):
return initial_state, all_obs, all_act, rewards, all_pad
def sample_episodes(self, sess):
def sample_episodes(self, sess, greedy=False):
"""Sample steps from the environment until we have enough for a batch."""
# check if last batch ended with episode that was not terminated
......@@ -200,7 +202,7 @@ class Controller(object):
while total_steps < self.max_step * len(self.env):
(initial_state,
observations, actions, rewards,
pads) = self._sample_episodes(sess)
pads) = self._sample_episodes(sess, greedy=greedy)
observations = zip(*observations)
actions = zip(*actions)
......@@ -249,19 +251,26 @@ class Controller(object):
observations, initial_state, actions,
rewards, terminated, pads):
"""Train model using batch."""
avg_episode_reward = np.mean(self.episode_rewards)
greedy_episode_reward = (np.mean(self.greedy_episode_rewards)
if self.greedy_episode_rewards else
avg_episode_reward)
loss, summary = None, None
if self.use_trust_region:
# use trust region to optimize policy
loss, _, summary = self.model.trust_region_step(
sess,
observations, initial_state, actions,
rewards, terminated, pads,
avg_episode_reward=np.mean(self.episode_rewards))
avg_episode_reward=avg_episode_reward,
greedy_episode_reward=greedy_episode_reward)
else: # otherwise use simple gradient descent on policy
loss, _, summary = self.model.train_step(
sess,
observations, initial_state, actions,
rewards, terminated, pads,
avg_episode_reward=np.mean(self.episode_rewards))
avg_episode_reward=avg_episode_reward,
greedy_episode_reward=greedy_episode_reward)
if self.use_value_opt: # optionally perform specific value optimization
self.model.fit_values(
......@@ -305,7 +314,8 @@ class Controller(object):
if self.update_eps_lambda:
episode_rewards = np.array(self.episode_rewards)
episode_lengths = np.array(self.episode_lengths)
eps_lambda = find_best_eps_lambda(episode_rewards, episode_lengths)
eps_lambda = find_best_eps_lambda(
episode_rewards[-20:], episode_lengths[-20:])
sess.run(self.model.objective.assign_eps_lambda,
feed_dict={self.model.objective.new_eps_lambda: eps_lambda})
......@@ -328,10 +338,10 @@ class Controller(object):
"""Use greedy sampling."""
(initial_state,
observations, actions, rewards,
pads) = self._sample_episodes(sess, greedy=True)
pads, terminated) = self.sample_episodes(sess, greedy=True)
total_rewards = np.sum(np.array(rewards) * (1 - np.array(pads)), axis=0)
return np.mean(total_rewards)
return total_rewards, self.episode_rewards
def convert_from_batched_episodes(
self, initial_state, observations, actions, rewards,
......@@ -351,7 +361,7 @@ class Controller(object):
for i in xrange(num_episodes):
length = total_length[i]
ep_initial = initial_state[i]
ep_obs = [obs[:length, i, ...] for obs in observations]
ep_obs = [obs[:length + 1, i, ...] for obs in observations]
ep_act = [act[:length + 1, i, ...] for act in actions]
ep_rewards = rewards[:length, i]
......
......@@ -20,6 +20,7 @@ from __future__ import division
from __future__ import print_function
import numpy as np
from six.moves import xrange
class spaces(object):
......
......@@ -22,6 +22,7 @@ import tensorflow as tf
import random
import os
import numpy as np
from six.moves import xrange
import pickle
gfile = tf.gfile
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment