Commit 85e10a2c authored by Raymond Yuan's avatar Raymond Yuan
Browse files

updated default lr and hyperparams

parent 016ddfc6
import os import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "" os.environ["CUDA_VISIBLE_DEVICES"] = ""
import threading import threading
...@@ -23,7 +23,7 @@ parser.add_argument('--algorithm', default='a3c', type=str, ...@@ -23,7 +23,7 @@ parser.add_argument('--algorithm', default='a3c', type=str,
help='Choose between \'a3c\' and \'random\'.') help='Choose between \'a3c\' and \'random\'.')
parser.add_argument('--train', dest='train', action='store_true', parser.add_argument('--train', dest='train', action='store_true',
help='Train our model.') help='Train our model.')
parser.add_argument('--lr', default=0.0005, parser.add_argument('--lr', default=0.001,
help='Learning rate for the shared optimizer.') help='Learning rate for the shared optimizer.')
parser.add_argument('--update-freq', default=20, type=int, parser.add_argument('--update-freq', default=20, type=int,
help='How often to update the global model.') help='How often to update the global model.')
...@@ -350,12 +350,12 @@ class Worker(threading.Thread): ...@@ -350,12 +350,12 @@ class Worker(threading.Thread):
actions_one_hot = tf.one_hot(memory.actions, self.action_size, dtype=tf.float32) actions_one_hot = tf.one_hot(memory.actions, self.action_size, dtype=tf.float32)
policy = tf.nn.softmax(logits) policy = tf.nn.softmax(logits)
entropy = tf.reduce_sum(policy * tf.log(policy + 1e-10), axis=1) entropy = tf.reduce_sum(policy * tf.log(policy + 1e-20), axis=1)
policy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=actions_one_hot, policy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=actions_one_hot,
logits=logits) logits=logits)
policy_loss *= tf.stop_gradient(advantage) policy_loss *= tf.stop_gradient(advantage)
policy_loss += 0.01 * entropy policy_loss -= 0.01 * entropy
total_loss = tf.reduce_mean((0.5 * value_loss + policy_loss)) total_loss = tf.reduce_mean((0.5 * value_loss + policy_loss))
return total_loss return total_loss
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment