Commit 91ff146d authored by Lukasz Kaiser's avatar Lukasz Kaiser
Browse files

Move to batch-size 32 to fit into 4GB GPUs, start adapting hyperparameters.

parent 47ab157a
...@@ -151,7 +151,7 @@ class NeuralGPU(object): ...@@ -151,7 +151,7 @@ class NeuralGPU(object):
tf.constant(0, dtype=tf.int32, shape=[1]), tf.constant(0, dtype=tf.int32, shape=[1]),
tf.zeros([1, vec_size])) tf.zeros([1, vec_size]))
adam = tf.train.AdamOptimizer(0.01*self.lr, epsilon=1e-5) adam = tf.train.AdamOptimizer(0.01*self.lr, epsilon=1e-4)
# Main graph creation loop, for every bin in data_utils. # Main graph creation loop, for every bin in data_utils.
self.steps = [] self.steps = []
......
...@@ -31,16 +31,16 @@ from tensorflow.python.platform import gfile ...@@ -31,16 +31,16 @@ from tensorflow.python.platform import gfile
import data_utils as data import data_utils as data
import neural_gpu import neural_gpu
tf.app.flags.DEFINE_float("lr", 0.1, "Learning rate.") tf.app.flags.DEFINE_float("lr", 0.3, "Learning rate.")
tf.app.flags.DEFINE_float("init_weight", 1.0, "Initial weights deviation.") tf.app.flags.DEFINE_float("init_weight", 1.0, "Initial weights deviation.")
tf.app.flags.DEFINE_float("max_grad_norm", 0.05, "Clip gradients to this norm.") tf.app.flags.DEFINE_float("max_grad_norm", 0.05, "Clip gradients to this norm.")
tf.app.flags.DEFINE_float("cutoff", 1.2, "Cutoff at the gates.") tf.app.flags.DEFINE_float("cutoff", 1.2, "Cutoff at the gates.")
tf.app.flags.DEFINE_float("pull", 0.0005, "Starting pull of the relaxations.") tf.app.flags.DEFINE_float("pull", 0.0005, "Starting pull of the relaxations.")
tf.app.flags.DEFINE_float("pull_incr", 1.2, "Increase pull by that much.") tf.app.flags.DEFINE_float("pull_incr", 1.2, "Increase pull by that much.")
tf.app.flags.DEFINE_float("curriculum_bound", 0.06, "Move curriculum < this.") tf.app.flags.DEFINE_float("curriculum_bound", 0.08, "Move curriculum < this.")
tf.app.flags.DEFINE_float("dropout", 0.15, "Dropout that much.") tf.app.flags.DEFINE_float("dropout", 0.15, "Dropout that much.")
tf.app.flags.DEFINE_float("grad_noise_scale", 1.0, "Gradient noise scale.") tf.app.flags.DEFINE_float("grad_noise_scale", 1.0, "Gradient noise scale.")
tf.app.flags.DEFINE_integer("batch_size", 64, "Batch size.") tf.app.flags.DEFINE_integer("batch_size", 32, "Batch size.")
tf.app.flags.DEFINE_integer("low_batch_size", 16, "Low batch size.") tf.app.flags.DEFINE_integer("low_batch_size", 16, "Low batch size.")
tf.app.flags.DEFINE_integer("steps_per_checkpoint", 200, "Steps per epoch.") tf.app.flags.DEFINE_integer("steps_per_checkpoint", 200, "Steps per epoch.")
tf.app.flags.DEFINE_integer("nmaps", 24, "Number of floats in each cell.") tf.app.flags.DEFINE_integer("nmaps", 24, "Number of floats in each cell.")
...@@ -256,7 +256,7 @@ def train(): ...@@ -256,7 +256,7 @@ def train():
if max_cur_length < max_length: if max_cur_length < max_length:
prev_acc_perp.append(1000000) prev_acc_perp.append(1000000)
# Either increase pull or, if it's large, average parameters. # Either increase pull or, if it's large, average parameters.
if pull < 1: if pull < 0.1:
sess.run(model.pull_incr_op) sess.run(model.pull_incr_op)
else: else:
data.print_out(" Averaging parameters.") data.print_out(" Averaging parameters.")
...@@ -283,7 +283,7 @@ def train(): ...@@ -283,7 +283,7 @@ def train():
l += 1 l += 1
while l < bound + 1 and not data.test_set[t][l]: while l < bound + 1 and not data.test_set[t][l]:
l += 1 l += 1
if seq_err < 0.5: # Run larger test if we're good enough. if seq_err < 0.05: # Run larger test if we're good enough.
_, seq_err = multi_test(data.forward_max, model, sess, t, _, seq_err = multi_test(data.forward_max, model, sess, t,
FLAGS.nprint, batch_size * 4) FLAGS.nprint, batch_size * 4)
if seq_err < 0.01: # Super-large test on 1-task large-forward models. if seq_err < 0.01: # Super-large test on 1-task large-forward models.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment