Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
85e10a2c
Commit
85e10a2c
authored
Jul 23, 2018
by
Raymond Yuan
Browse files
updated default lr and hyperparams
parent
016ddfc6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
4 deletions
+4
-4
research/a3c_blogpost/a3c_cartpole.py
research/a3c_blogpost/a3c_cartpole.py
+4
-4
No files found.
research/a3c_blogpost/a3c_cartpole.py
View file @
85e10a2c
import
os
import
os
os
.
environ
[
"CUDA_DEVICE_ORDER"
]
=
"PCI_BUS_ID"
# see issue #152
#
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
""
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
""
import
threading
import
threading
...
@@ -23,7 +23,7 @@ parser.add_argument('--algorithm', default='a3c', type=str,
...
@@ -23,7 +23,7 @@ parser.add_argument('--algorithm', default='a3c', type=str,
help
=
'Choose between
\'
a3c
\'
and
\'
random
\'
.'
)
help
=
'Choose between
\'
a3c
\'
and
\'
random
\'
.'
)
parser
.
add_argument
(
'--train'
,
dest
=
'train'
,
action
=
'store_true'
,
parser
.
add_argument
(
'--train'
,
dest
=
'train'
,
action
=
'store_true'
,
help
=
'Train our model.'
)
help
=
'Train our model.'
)
parser
.
add_argument
(
'--lr'
,
default
=
0.00
05
,
parser
.
add_argument
(
'--lr'
,
default
=
0.00
1
,
help
=
'Learning rate for the shared optimizer.'
)
help
=
'Learning rate for the shared optimizer.'
)
parser
.
add_argument
(
'--update-freq'
,
default
=
20
,
type
=
int
,
parser
.
add_argument
(
'--update-freq'
,
default
=
20
,
type
=
int
,
help
=
'How often to update the global model.'
)
help
=
'How often to update the global model.'
)
...
@@ -350,12 +350,12 @@ class Worker(threading.Thread):
...
@@ -350,12 +350,12 @@ class Worker(threading.Thread):
actions_one_hot
=
tf
.
one_hot
(
memory
.
actions
,
self
.
action_size
,
dtype
=
tf
.
float32
)
actions_one_hot
=
tf
.
one_hot
(
memory
.
actions
,
self
.
action_size
,
dtype
=
tf
.
float32
)
policy
=
tf
.
nn
.
softmax
(
logits
)
policy
=
tf
.
nn
.
softmax
(
logits
)
entropy
=
tf
.
reduce_sum
(
policy
*
tf
.
log
(
policy
+
1e-
1
0
),
axis
=
1
)
entropy
=
tf
.
reduce_sum
(
policy
*
tf
.
log
(
policy
+
1e-
2
0
),
axis
=
1
)
policy_loss
=
tf
.
nn
.
softmax_cross_entropy_with_logits_v2
(
labels
=
actions_one_hot
,
policy_loss
=
tf
.
nn
.
softmax_cross_entropy_with_logits_v2
(
labels
=
actions_one_hot
,
logits
=
logits
)
logits
=
logits
)
policy_loss
*=
tf
.
stop_gradient
(
advantage
)
policy_loss
*=
tf
.
stop_gradient
(
advantage
)
policy_loss
+
=
0.01
*
entropy
policy_loss
-
=
0.01
*
entropy
total_loss
=
tf
.
reduce_mean
((
0.5
*
value_loss
+
policy_loss
))
total_loss
=
tf
.
reduce_mean
((
0.5
*
value_loss
+
policy_loss
))
return
total_loss
return
total_loss
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment