Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
5b74f764
Commit
5b74f764
authored
Jan 11, 2021
by
mohammad
Browse files
fixed validation loss reporting in tensorboard
parent
f5eac3d1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
3 deletions
+9
-3
megatron/training.py
megatron/training.py
+9
-3
No files found.
megatron/training.py
View file @
5b74f764
...
@@ -924,6 +924,7 @@ def evaluate_and_print_results(prefix, forward_step_func,
...
@@ -924,6 +924,7 @@ def evaluate_and_print_results(prefix, forward_step_func,
data_iterator
,
model
,
data_iterator
,
model
,
iteration
,
verbose
=
False
):
iteration
,
verbose
=
False
):
"""Helper function to evaluate and dump results on screen."""
"""Helper function to evaluate and dump results on screen."""
args
=
get_args
()
writer
=
get_tensorboard_writer
()
writer
=
get_tensorboard_writer
()
total_loss_dict
=
evaluate
(
forward_step_func
,
data_iterator
,
model
,
verbose
)
total_loss_dict
=
evaluate
(
forward_step_func
,
data_iterator
,
model
,
verbose
)
...
@@ -932,11 +933,16 @@ def evaluate_and_print_results(prefix, forward_step_func,
...
@@ -932,11 +933,16 @@ def evaluate_and_print_results(prefix, forward_step_func,
string
+=
'{} value: {:.6E} | '
.
format
(
key
,
total_loss_dict
[
key
].
item
())
string
+=
'{} value: {:.6E} | '
.
format
(
key
,
total_loss_dict
[
key
].
item
())
ppl
=
math
.
exp
(
min
(
20
,
total_loss_dict
[
key
].
item
()))
ppl
=
math
.
exp
(
min
(
20
,
total_loss_dict
[
key
].
item
()))
string
+=
'{} PPL: {:.6E} | '
.
format
(
key
,
ppl
)
string
+=
'{} PPL: {:.6E} | '
.
format
(
key
,
ppl
)
if
writer
and
torch
.
distributed
.
ge
t_rank
()
==
0
:
if
writer
and
is_las
t_rank
():
writer
.
add_scalar
(
'{} value'
.
format
(
key
),
writer
.
add_scalar
(
'{} value
-validation
'
.
format
(
key
),
total_loss_dict
[
key
].
item
(),
total_loss_dict
[
key
].
item
(),
iteration
)
iteration
)
writer
.
add_scalar
(
'{} ppl'
.
format
(
key
),
ppl
,
iteration
)
writer
.
add_scalar
(
'{} ppl-validation'
.
format
(
key
),
ppl
,
iteration
)
writer
.
add_scalar
(
'{} value-validation vs samples'
.
format
(
key
),
total_loss_dict
[
key
].
item
(),
args
.
consumed_train_samples
)
writer
.
add_scalar
(
'{} ppl-validation vs samples'
.
format
(
key
),
ppl
,
args
.
consumed_train_samples
)
length
=
len
(
string
)
+
1
length
=
len
(
string
)
+
1
print_rank_last
(
'-'
*
length
)
print_rank_last
(
'-'
*
length
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment