Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
016ddfc6
"git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "1f6986e91902b1308a8dc6be45418b7db9ccc0e9"
Commit
016ddfc6
authored
Jul 23, 2018
by
Raymond Yuan
Browse files
updated loss fn (added entropy and fixed advantage bug)
parent
22a669d6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
3 deletions
+8
-3
research/a3c_blogpost/a3c_cartpole.py
research/a3c_blogpost/a3c_cartpole.py
+8
-3
No files found.
research/a3c_blogpost/a3c_cartpole.py
View file @
016ddfc6
...
@@ -27,7 +27,7 @@ parser.add_argument('--lr', default=0.0005,
...
@@ -27,7 +27,7 @@ parser.add_argument('--lr', default=0.0005,
help
=
'Learning rate for the shared optimizer.'
)
help
=
'Learning rate for the shared optimizer.'
)
parser
.
add_argument
(
'--update-freq'
,
default
=
20
,
type
=
int
,
parser
.
add_argument
(
'--update-freq'
,
default
=
20
,
type
=
int
,
help
=
'How often to update the global model.'
)
help
=
'How often to update the global model.'
)
parser
.
add_argument
(
'--max-eps'
,
default
=
2
000
,
type
=
int
,
parser
.
add_argument
(
'--max-eps'
,
default
=
1
000
,
type
=
int
,
help
=
'Global maximum number of episodes to run.'
)
help
=
'Global maximum number of episodes to run.'
)
parser
.
add_argument
(
'--gamma'
,
default
=
0.99
,
parser
.
add_argument
(
'--gamma'
,
default
=
0.99
,
help
=
'Discount factor of rewards.'
)
help
=
'Discount factor of rewards.'
)
...
@@ -349,9 +349,14 @@ class Worker(threading.Thread):
...
@@ -349,9 +349,14 @@ class Worker(threading.Thread):
# Calculate our policy loss
# Calculate our policy loss
actions_one_hot
=
tf
.
one_hot
(
memory
.
actions
,
self
.
action_size
,
dtype
=
tf
.
float32
)
actions_one_hot
=
tf
.
one_hot
(
memory
.
actions
,
self
.
action_size
,
dtype
=
tf
.
float32
)
policy_loss
=
-
tf
.
nn
.
softmax_cross_entropy_with_logits_v2
(
labels
=
actions_one_hot
,
policy
=
tf
.
nn
.
softmax
(
logits
)
entropy
=
tf
.
reduce_sum
(
policy
*
tf
.
log
(
policy
+
1e-10
),
axis
=
1
)
policy_loss
=
tf
.
nn
.
softmax_cross_entropy_with_logits_v2
(
labels
=
actions_one_hot
,
logits
=
logits
)
logits
=
logits
)
total_loss
=
tf
.
reduce_mean
((
value_loss
+
policy_loss
))
policy_loss
*=
tf
.
stop_gradient
(
advantage
)
policy_loss
+=
0.01
*
entropy
total_loss
=
tf
.
reduce_mean
((
0.5
*
value_loss
+
policy_loss
))
return
total_loss
return
total_loss
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment