"tests/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "283b597a6eee445f39f254469910caa8b91d3a8f"
Unverified Commit 6b9d5fba authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Merge branch 'master' into patch-1

parents 5fd687c5 5fa2a4e6
...@@ -18,6 +18,8 @@ This file calls functions to load & pre-process data, construct the TF graph ...@@ -18,6 +18,8 @@ This file calls functions to load & pre-process data, construct the TF graph
and performs training or evaluation as specified by the flag evaluator_job and performs training or evaluation as specified by the flag evaluator_job
Author: aneelakantan (Arvind Neelakantan) Author: aneelakantan (Arvind Neelakantan)
""" """
from __future__ import print_function
import time import time
from random import Random from random import Random
import numpy as np import numpy as np
...@@ -113,9 +115,9 @@ def evaluate(sess, data, batch_size, graph, i): ...@@ -113,9 +115,9 @@ def evaluate(sess, data, batch_size, graph, i):
graph)) graph))
gc += ct * batch_size gc += ct * batch_size
num_examples += batch_size num_examples += batch_size
print "dev set accuracy after ", i, " : ", gc / num_examples print("dev set accuracy after ", i, " : ", gc / num_examples)
print num_examples, len(data) print(num_examples, len(data))
print "--------" print("--------")
def Train(graph, utility, batch_size, train_data, sess, model_dir, def Train(graph, utility, batch_size, train_data, sess, model_dir,
...@@ -142,15 +144,15 @@ def Train(graph, utility, batch_size, train_data, sess, model_dir, ...@@ -142,15 +144,15 @@ def Train(graph, utility, batch_size, train_data, sess, model_dir,
if (i > 0 and i % FLAGS.eval_cycle == 0): if (i > 0 and i % FLAGS.eval_cycle == 0):
end = time.time() end = time.time()
time_taken = end - start time_taken = end - start
print "step ", i, " ", time_taken, " seconds " print("step ", i, " ", time_taken, " seconds ")
start = end start = end
print " printing train set loss: ", train_set_loss / utility.FLAGS.eval_cycle print(" printing train set loss: ", train_set_loss / utility.FLAGS.eval_cycle)
train_set_loss = 0.0 train_set_loss = 0.0
def master(train_data, dev_data, utility): def master(train_data, dev_data, utility):
#creates TF graph and calls trainer or evaluator #creates TF graph and calls trainer or evaluator
batch_size = utility.FLAGS.batch_size batch_size = utility.FLAGS.batch_size
model_dir = utility.FLAGS.output_dir + "/model" + utility.FLAGS.job_id + "/" model_dir = utility.FLAGS.output_dir + "/model" + utility.FLAGS.job_id + "/"
#create all paramters of the model #create all paramters of the model
param_class = parameters.Parameters(utility) param_class = parameters.Parameters(utility)
...@@ -183,23 +185,23 @@ def master(train_data, dev_data, utility): ...@@ -183,23 +185,23 @@ def master(train_data, dev_data, utility):
file_list = sorted(selected_models.items(), key=lambda x: x[0]) file_list = sorted(selected_models.items(), key=lambda x: x[0])
if (len(file_list) > 0): if (len(file_list) > 0):
file_list = file_list[0:len(file_list) - 1] file_list = file_list[0:len(file_list) - 1]
print "list of models: ", file_list print("list of models: ", file_list)
for model_file in file_list: for model_file in file_list:
model_file = model_file[1] model_file = model_file[1]
print "restoring: ", model_file print("restoring: ", model_file)
saver.restore(sess, model_dir + "/" + model_file) saver.restore(sess, model_dir + "/" + model_file)
model_step = int( model_step = int(
model_file.split("_")[len(model_file.split("_")) - 1]) model_file.split("_")[len(model_file.split("_")) - 1])
print "evaluating on dev ", model_file, model_step print("evaluating on dev ", model_file, model_step)
evaluate(sess, dev_data, batch_size, graph, model_step) evaluate(sess, dev_data, batch_size, graph, model_step)
else: else:
ckpt = tf.train.get_checkpoint_state(model_dir) ckpt = tf.train.get_checkpoint_state(model_dir)
print "model dir: ", model_dir print("model dir: ", model_dir)
if (not (tf.gfile.IsDirectory(utility.FLAGS.output_dir))): if (not (tf.gfile.IsDirectory(utility.FLAGS.output_dir))):
print "create dir: ", utility.FLAGS.output_dir print("create dir: ", utility.FLAGS.output_dir)
tf.gfile.MkDir(utility.FLAGS.output_dir) tf.gfile.MkDir(utility.FLAGS.output_dir)
if (not (tf.gfile.IsDirectory(model_dir))): if (not (tf.gfile.IsDirectory(model_dir))):
print "create dir: ", model_dir print("create dir: ", model_dir)
tf.gfile.MkDir(model_dir) tf.gfile.MkDir(model_dir)
Train(graph, utility, batch_size, train_data, sess, model_dir, Train(graph, utility, batch_size, train_data, sess, model_dir,
saver) saver)
...@@ -225,10 +227,10 @@ def main(args): ...@@ -225,10 +227,10 @@ def main(args):
train_data = data_utils.complete_wiki_processing(train_data, utility, True) train_data = data_utils.complete_wiki_processing(train_data, utility, True)
dev_data = data_utils.complete_wiki_processing(dev_data, utility, False) dev_data = data_utils.complete_wiki_processing(dev_data, utility, False)
test_data = data_utils.complete_wiki_processing(test_data, utility, False) test_data = data_utils.complete_wiki_processing(test_data, utility, False)
print "# train examples ", len(train_data) print("# train examples ", len(train_data))
print "# dev examples ", len(dev_data) print("# dev examples ", len(dev_data))
print "# test examples ", len(test_data) print("# test examples ", len(test_data))
print "running open source" print("running open source")
#construct TF graph and train or evaluate #construct TF graph and train or evaluate
master(train_data, dev_data, utility) master(train_data, dev_data, utility)
......
...@@ -59,7 +59,7 @@ class Parameters: ...@@ -59,7 +59,7 @@ class Parameters:
#Biases for the gates and cell #Biases for the gates and cell
for bias in ["i", "f", "c", "o"]: for bias in ["i", "f", "c", "o"]:
if (bias == "f"): if (bias == "f"):
print "forget gate bias" print("forget gate bias")
params[key + "_" + bias] = tf.Variable( params[key + "_" + bias] = tf.Variable(
tf.random_uniform([embedding_dims], 1.0, 1.1, self.utility. tf.random_uniform([embedding_dims], 1.0, 1.1, self.utility.
tf_data_type[self.utility.FLAGS.data_type])) tf_data_type[self.utility.FLAGS.data_type]))
......
...@@ -22,6 +22,8 @@ columns. ...@@ -22,6 +22,8 @@ columns.
lookup answer (or matrix) is also split into number and word lookup matrix lookup answer (or matrix) is also split into number and word lookup matrix
Author: aneelakantan (Arvind Neelakantan) Author: aneelakantan (Arvind Neelakantan)
""" """
from __future__ import print_function
import math import math
import os import os
import re import re
...@@ -56,7 +58,7 @@ def correct_unicode(string): ...@@ -56,7 +58,7 @@ def correct_unicode(string):
#string = re.sub("[“”«»]", "\"", string) #string = re.sub("[“”«»]", "\"", string)
#string = re.sub("[•†‡]", "", string) #string = re.sub("[•†‡]", "", string)
#string = re.sub("[‐‑–—]", "-", string) #string = re.sub("[‐‑–—]", "-", string)
string = re.sub(ur'[\u2E00-\uFFFF]', "", string) string = re.sub(r'[\u2E00-\uFFFF]', "", string)
string = re.sub("\\s+", " ", string).strip() string = re.sub("\\s+", " ", string).strip()
return string return string
...@@ -78,7 +80,7 @@ def full_normalize(string): ...@@ -78,7 +80,7 @@ def full_normalize(string):
# Remove trailing info in brackets # Remove trailing info in brackets
string = re.sub("\[[^\]]*\]", "", string) string = re.sub("\[[^\]]*\]", "", string)
# Remove most unicode characters in other languages # Remove most unicode characters in other languages
string = re.sub(ur'[\u007F-\uFFFF]', "", string.strip()) string = re.sub(r'[\u007F-\uFFFF]', "", string.strip())
# Remove trailing info in parenthesis # Remove trailing info in parenthesis
string = re.sub("\([^)]*\)$", "", string.strip()) string = re.sub("\([^)]*\)$", "", string.strip())
string = final_normalize(string) string = final_normalize(string)
...@@ -207,7 +209,7 @@ class WikiQuestionGenerator(object): ...@@ -207,7 +209,7 @@ class WikiQuestionGenerator(object):
self.dev_loader = WikiQuestionLoader(dev_name, root_folder) self.dev_loader = WikiQuestionLoader(dev_name, root_folder)
self.test_loader = WikiQuestionLoader(test_name, root_folder) self.test_loader = WikiQuestionLoader(test_name, root_folder)
self.bad_examples = 0 self.bad_examples = 0
self.root_folder = root_folder self.root_folder = root_folder
self.data_folder = os.path.join(self.root_folder, "annotated/data") self.data_folder = os.path.join(self.root_folder, "annotated/data")
self.annotated_examples = {} self.annotated_examples = {}
self.annotated_tables = {} self.annotated_tables = {}
...@@ -298,7 +300,7 @@ class WikiQuestionGenerator(object): ...@@ -298,7 +300,7 @@ class WikiQuestionGenerator(object):
question_id, question, target_canon, context) question_id, question, target_canon, context)
self.annotated_tables[context] = [] self.annotated_tables[context] = []
counter += 1 counter += 1
print "Annotated examples loaded ", len(self.annotated_examples) print("Annotated examples loaded ", len(self.annotated_examples))
f.close() f.close()
def is_number_column(self, a): def is_number_column(self, a):
......
...@@ -20,6 +20,7 @@ import sys ...@@ -20,6 +20,7 @@ import sys
import time import time
import numpy as np import numpy as np
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
import model as cross_conv_model import model as cross_conv_model
......
...@@ -18,6 +18,7 @@ import random ...@@ -18,6 +18,7 @@ import random
import sys import sys
import numpy as np import numpy as np
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
......
...@@ -20,6 +20,7 @@ https://arxiv.org/pdf/1607.02586v1.pdf ...@@ -20,6 +20,7 @@ https://arxiv.org/pdf/1607.02586v1.pdf
import math import math
import sys import sys
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
slim = tf.contrib.slim slim = tf.contrib.slim
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Read image sequence.""" """Read image sequence."""
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
......
...@@ -21,6 +21,7 @@ import sys ...@@ -21,6 +21,7 @@ import sys
import numpy as np import numpy as np
import scipy.misc import scipy.misc
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
......
...@@ -96,7 +96,7 @@ def main(_): ...@@ -96,7 +96,7 @@ def main(_):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
image_annotations, label_map, encoded_image) image_annotations, label_map, encoded_image)
if tf_example: if tf_example:
shard_idx = long(image_id, 16) % FLAGS.num_shards shard_idx = int(image_id, 16) % FLAGS.num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString()) output_tfrecords[shard_idx].write(tf_example.SerializeToString())
......
...@@ -18,6 +18,7 @@ from __future__ import absolute_import ...@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
from object_detection.core import standard_fields from object_detection.core import standard_fields
......
...@@ -69,7 +69,7 @@ def freeze_graph_with_def_protos( ...@@ -69,7 +69,7 @@ def freeze_graph_with_def_protos(
if optimize_graph: if optimize_graph:
logging.info('Graph Rewriter optimizations enabled') logging.info('Graph Rewriter optimizations enabled')
rewrite_options = rewriter_config_pb2.RewriterConfig( rewrite_options = rewriter_config_pb2.RewriterConfig(
optimize_tensor_layout=True) layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
rewrite_options.optimizers.append('pruning') rewrite_options.optimizers.append('pruning')
rewrite_options.optimizers.append('constfold') rewrite_options.optimizers.append('constfold')
rewrite_options.optimizers.append('layout') rewrite_options.optimizers.append('layout')
......
...@@ -21,6 +21,7 @@ Example box operations that are supported: ...@@ -21,6 +21,7 @@ Example box operations that are supported:
""" """
import numpy as np import numpy as np
from six.moves import xrange
from object_detection.utils import np_box_list from object_detection.utils import np_box_list
from object_detection.utils import np_box_ops from object_detection.utils import np_box_ops
......
...@@ -203,9 +203,9 @@ def padded_one_hot_encoding(indices, depth, left_pad): ...@@ -203,9 +203,9 @@ def padded_one_hot_encoding(indices, depth, left_pad):
TODO: add runtime checks for depth and indices. TODO: add runtime checks for depth and indices.
""" """
if depth < 0 or not isinstance(depth, (int, long) if six.PY2 else int): if depth < 0 or not isinstance(depth, six.integer_types):
raise ValueError('`depth` must be a non-negative integer.') raise ValueError('`depth` must be a non-negative integer.')
if left_pad < 0 or not isinstance(left_pad, (int, long) if six.PY2 else int): if left_pad < 0 or not isinstance(left_pad, six.integer_types):
raise ValueError('`left_pad` must be a non-negative integer.') raise ValueError('`left_pad` must be a non-negative integer.')
if depth == 0: if depth == 0:
return None return None
......
...@@ -145,7 +145,7 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -145,7 +145,7 @@ class VisualizationUtilsTest(tf.test.TestCase):
for i in range(images_with_boxes_np.shape[0]): for i in range(images_with_boxes_np.shape[0]):
img_name = 'image_' + str(i) + '.png' img_name = 'image_' + str(i) + '.png'
output_file = os.path.join(self.get_temp_dir(), img_name) output_file = os.path.join(self.get_temp_dir(), img_name)
print 'Writing output image %d to %s' % (i, output_file) print('Writing output image %d to %s' % (i, output_file))
image_pil = Image.fromarray(images_with_boxes_np[i, ...]) image_pil = Image.fromarray(images_with_boxes_np[i, ...])
image_pil.save(output_file) image_pil.save(output_file)
......
...@@ -67,20 +67,27 @@ python trainer.py --logtostderr --batch_size=25 --env=HalfCheetah-v1 \ ...@@ -67,20 +67,27 @@ python trainer.py --logtostderr --batch_size=25 --env=HalfCheetah-v1 \
--max_divergence=0.05 --value_opt=best_fit --critic_weight=0.0 \ --max_divergence=0.05 --value_opt=best_fit --critic_weight=0.0 \
``` ```
Run Mujoco task with Trust-PCL: To run Mujoco task using Trust-PCL (off-policy) use the below command.
It should work well across all environments, given that you
search sufficiently among
(1) max_divergence (0.001, 0.0005, 0.002 are good values),
(2) rollout (1, 5, 10 are good values),
(3) tf_seed (need to average over enough random seeds).
``` ```
python trainer.py --logtostderr --batch_size=1 --env=HalfCheetah-v1 \ python trainer.py --logtostderr --batch_size=1 --env=HalfCheetah-v1 \
--validation_frequency=50 --rollout=10 --critic_weight=0.0 \ --validation_frequency=250 --rollout=1 --critic_weight=1.0 --gamma=0.995 \
--gamma=0.995 --clip_norm=40 --learning_rate=0.002 \ --clip_norm=40 --learning_rate=0.0001 --replay_buffer_freq=1 \
--replay_buffer_freq=1 --replay_buffer_size=20000 \ --replay_buffer_size=5000 --replay_buffer_alpha=0.001 --norecurrent \
--replay_buffer_alpha=0.1 --norecurrent --objective=pcl \ --objective=pcl --max_step=10 --cutoff_agent=1000 --tau=0.0 --eviction=fifo \
--max_step=100 --tau=0.0 --eviction=fifo --max_divergence=0.001 \ --max_divergence=0.001 --internal_dim=256 --replay_batch_size=64 \
--internal_dim=64 --cutoff_agent=1000 \ --nouse_online_batch --batch_by_steps --value_hidden_layers=2 \
--replay_batch_size=25 --nouse_online_batch --batch_by_steps \ --update_eps_lambda --nounify_episodes --target_network_lag=0.99 \
--sample_from=target --value_opt=grad --value_hidden_layers=2 \ --sample_from=online --clip_adv=1 --prioritize_by=step --num_steps=1000000 \
--update_eps_lambda --unify_episodes --clip_adv=1.0 \ --noinput_prev_actions --use_target_values --tf_seed=57
--target_network_lag=0.99 --prioritize_by=step
``` ```
Run Mujoco task with PCL constraint trust region: Run Mujoco task with PCL constraint trust region:
......
...@@ -20,6 +20,7 @@ In some cases this is just an additional linear layer on the policy. ...@@ -20,6 +20,7 @@ In some cases this is just an additional linear layer on the policy.
In other cases, it is a completely separate neural network. In other cases, it is a completely separate neural network.
""" """
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
......
...@@ -20,6 +20,7 @@ from __future__ import absolute_import ...@@ -20,6 +20,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import pickle import pickle
...@@ -109,13 +110,14 @@ class Controller(object): ...@@ -109,13 +110,14 @@ class Controller(object):
self.episode_running_rewards = np.zeros(len(self.env)) self.episode_running_rewards = np.zeros(len(self.env))
self.episode_running_lengths = np.zeros(len(self.env)) self.episode_running_lengths = np.zeros(len(self.env))
self.episode_rewards = [] self.episode_rewards = []
self.greedy_episode_rewards = []
self.episode_lengths = [] self.episode_lengths = []
self.total_rewards = [] self.total_rewards = []
self.best_batch_rewards = None self.best_batch_rewards = None
def setup(self): def setup(self, train=True):
self.model.setup() self.model.setup(train=train)
def initial_internal_state(self): def initial_internal_state(self):
return np.zeros(self.model.policy.rnn_state_dim) return np.zeros(self.model.policy.rnn_state_dim)
...@@ -187,7 +189,7 @@ class Controller(object): ...@@ -187,7 +189,7 @@ class Controller(object):
return initial_state, all_obs, all_act, rewards, all_pad return initial_state, all_obs, all_act, rewards, all_pad
def sample_episodes(self, sess): def sample_episodes(self, sess, greedy=False):
"""Sample steps from the environment until we have enough for a batch.""" """Sample steps from the environment until we have enough for a batch."""
# check if last batch ended with episode that was not terminated # check if last batch ended with episode that was not terminated
...@@ -200,7 +202,7 @@ class Controller(object): ...@@ -200,7 +202,7 @@ class Controller(object):
while total_steps < self.max_step * len(self.env): while total_steps < self.max_step * len(self.env):
(initial_state, (initial_state,
observations, actions, rewards, observations, actions, rewards,
pads) = self._sample_episodes(sess) pads) = self._sample_episodes(sess, greedy=greedy)
observations = zip(*observations) observations = zip(*observations)
actions = zip(*actions) actions = zip(*actions)
...@@ -249,19 +251,26 @@ class Controller(object): ...@@ -249,19 +251,26 @@ class Controller(object):
observations, initial_state, actions, observations, initial_state, actions,
rewards, terminated, pads): rewards, terminated, pads):
"""Train model using batch.""" """Train model using batch."""
avg_episode_reward = np.mean(self.episode_rewards)
greedy_episode_reward = (np.mean(self.greedy_episode_rewards)
if self.greedy_episode_rewards else
avg_episode_reward)
loss, summary = None, None
if self.use_trust_region: if self.use_trust_region:
# use trust region to optimize policy # use trust region to optimize policy
loss, _, summary = self.model.trust_region_step( loss, _, summary = self.model.trust_region_step(
sess, sess,
observations, initial_state, actions, observations, initial_state, actions,
rewards, terminated, pads, rewards, terminated, pads,
avg_episode_reward=np.mean(self.episode_rewards)) avg_episode_reward=avg_episode_reward,
greedy_episode_reward=greedy_episode_reward)
else: # otherwise use simple gradient descent on policy else: # otherwise use simple gradient descent on policy
loss, _, summary = self.model.train_step( loss, _, summary = self.model.train_step(
sess, sess,
observations, initial_state, actions, observations, initial_state, actions,
rewards, terminated, pads, rewards, terminated, pads,
avg_episode_reward=np.mean(self.episode_rewards)) avg_episode_reward=avg_episode_reward,
greedy_episode_reward=greedy_episode_reward)
if self.use_value_opt: # optionally perform specific value optimization if self.use_value_opt: # optionally perform specific value optimization
self.model.fit_values( self.model.fit_values(
...@@ -305,7 +314,8 @@ class Controller(object): ...@@ -305,7 +314,8 @@ class Controller(object):
if self.update_eps_lambda: if self.update_eps_lambda:
episode_rewards = np.array(self.episode_rewards) episode_rewards = np.array(self.episode_rewards)
episode_lengths = np.array(self.episode_lengths) episode_lengths = np.array(self.episode_lengths)
eps_lambda = find_best_eps_lambda(episode_rewards, episode_lengths) eps_lambda = find_best_eps_lambda(
episode_rewards[-20:], episode_lengths[-20:])
sess.run(self.model.objective.assign_eps_lambda, sess.run(self.model.objective.assign_eps_lambda,
feed_dict={self.model.objective.new_eps_lambda: eps_lambda}) feed_dict={self.model.objective.new_eps_lambda: eps_lambda})
...@@ -328,10 +338,10 @@ class Controller(object): ...@@ -328,10 +338,10 @@ class Controller(object):
"""Use greedy sampling.""" """Use greedy sampling."""
(initial_state, (initial_state,
observations, actions, rewards, observations, actions, rewards,
pads) = self._sample_episodes(sess, greedy=True) pads, terminated) = self.sample_episodes(sess, greedy=True)
total_rewards = np.sum(np.array(rewards) * (1 - np.array(pads)), axis=0) total_rewards = np.sum(np.array(rewards) * (1 - np.array(pads)), axis=0)
return np.mean(total_rewards) return total_rewards, self.episode_rewards
def convert_from_batched_episodes( def convert_from_batched_episodes(
self, initial_state, observations, actions, rewards, self, initial_state, observations, actions, rewards,
...@@ -351,7 +361,7 @@ class Controller(object): ...@@ -351,7 +361,7 @@ class Controller(object):
for i in xrange(num_episodes): for i in xrange(num_episodes):
length = total_length[i] length = total_length[i]
ep_initial = initial_state[i] ep_initial = initial_state[i]
ep_obs = [obs[:length, i, ...] for obs in observations] ep_obs = [obs[:length + 1, i, ...] for obs in observations]
ep_act = [act[:length + 1, i, ...] for act in actions] ep_act = [act[:length + 1, i, ...] for act in actions]
ep_rewards = rewards[:length, i] ep_rewards = rewards[:length, i]
......
...@@ -20,6 +20,7 @@ from __future__ import division ...@@ -20,6 +20,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
from six.moves import xrange
class spaces(object): class spaces(object):
......
...@@ -22,6 +22,7 @@ import tensorflow as tf ...@@ -22,6 +22,7 @@ import tensorflow as tf
import random import random
import os import os
import numpy as np import numpy as np
from six.moves import xrange
import pickle import pickle
gfile = tf.gfile gfile = tf.gfile
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment