update a3c_cartpole.py to tf2.x (#8662)

* update a3c_cartpole.py to tf2.x * update a3c_cartpole.py to tf2.x

update a3c_cartpole.py to tf2.x (#8662)
* update a3c_cartpole.py to tf2.x * update a3c_cartpole.py to tf2.x
8722f59f · moneypi · GitHub · ad423d06 · 8722f59f · 8722f59f
Unverified Commit 8722f59f authored Jun 12, 2020 by moneypi Committed by GitHub Jun 11, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 5 deletions

research/a3c_blogpost/README.md research/a3c_blogpost/README.md +1 -1

research/a3c_blogpost/a3c_cartpole.py research/a3c_blogpost/a3c_cartpole.py +2 -4

No files found.
--- a/research/a3c_blogpost/README.md
+++ b/research/a3c_blogpost/README.md
@@ -3,4 +3,4 @@ In order to run this code, you will need the following prerequisites:

 * [OpenAI Gym](https://github.com/openai/gym) - `pip install gym`
 * [pyglet](https://bitbucket.org/pyglet/pyglet/wiki/Home) - `pip install pyglet` 
-* [TensorFlow](https://www.tensorflow.org/install/) - `pip install tensorflow==v1.14.0`
+* [TensorFlow](https://www.tensorflow.org/install/) - `pip install tensorflow==2.2.0`
--- a/research/a3c_blogpost/a3c_cartpole.py
+++ b/research/a3c_blogpost/a3c_cartpole.py
@@ -14,8 +14,6 @@ import tensorflow as tf
 from tensorflow.python import keras
 from tensorflow.python.keras import layers

-tf.enable_eager_execution()
-
 parser = argparse.ArgumentParser(description='Run A3C algorithm on the game '
                                             'Cartpole.')
 parser.add_argument('--algorithm', default='a3c', type=str,
@@ -135,7 +133,7 @@ class MasterAgent():
    env = gym.make(self.game_name)
    self.state_size = env.observation_space.shape[0]
    self.action_size = env.action_space.n
-    self.opt = tf.train.AdamOptimizer(args.lr, use_locking=True)
+    self.opt = tf.compat.v1.train.AdamOptimizer(args.lr, use_locking=True)
    print(self.state_size, self.action_size)

    self.global_model = ActorCriticModel(self.state_size, self.action_size)  # global network
@@ -348,7 +346,7 @@ class Worker(threading.Thread):

    # Calculate our policy loss
    policy = tf.nn.softmax(logits)
-    entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=policy, logits=logits)
+    entropy = tf.nn.softmax_cross_entropy_with_logits(labels=policy, logits=logits)

    policy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=memory.actions,
                                                                 logits=logits)