Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
3d7194c4
"docs/vscode:/vscode.git/clone" did not exist on "ca4b86c564a735078aadb0bfb0e3d529735f2c79"
Commit
3d7194c4
authored
Nov 03, 2020
by
Deepak Narayanan
Browse files
Divide gradient by number of microbatches in minibatch
parent
a6756bf8
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
4 deletions
+5
-4
megatron/training.py
megatron/training.py
+1
-2
pretrain_bert.py
pretrain_bert.py
+2
-1
pretrain_gpt2.py
pretrain_gpt2.py
+2
-1
No files found.
megatron/training.py
View file @
3d7194c4
...
...
@@ -554,8 +554,7 @@ def train_step(forward_step_func, data_iterator,
loss_reduced
=
{}
for
key
in
losses_reduced
[
0
]:
losses_reduced_for_key
=
[
x
[
key
]
for
x
in
losses_reduced
]
loss_reduced
[
key
]
=
sum
(
losses_reduced_for_key
)
/
\
len
(
losses_reduced_for_key
)
loss_reduced
[
key
]
=
sum
(
losses_reduced_for_key
)
return
loss_reduced
,
skipped_iter
return
{},
skipped_iter
...
...
pretrain_bert.py
View file @
3d7194c4
...
...
@@ -118,7 +118,8 @@ def forward_step(data_iterator, model, input_tensor):
lm_loss_
=
lm_loss_
.
float
()
loss_mask
=
loss_mask
.
float
()
lm_loss
=
torch
.
sum
(
lm_loss_
.
view
(
-
1
)
*
loss_mask
.
reshape
(
-
1
))
/
loss_mask
.
sum
()
lm_loss_
.
view
(
-
1
)
*
loss_mask
.
reshape
(
-
1
))
/
(
loss_mask
.
sum
()
*
args
.
num_microbatches_in_minibatch
)
loss
=
lm_loss
+
sop_loss
...
...
pretrain_gpt2.py
View file @
3d7194c4
...
...
@@ -110,7 +110,8 @@ def forward_step(data_iterator, model, input_tensor):
if
mpu
.
is_pipeline_last_stage
():
losses
=
output_tensor
.
float
()
loss_mask
=
loss_mask
.
view
(
-
1
).
float
()
loss
=
torch
.
sum
(
losses
.
view
(
-
1
)
*
loss_mask
)
/
loss_mask
.
sum
()
loss
=
torch
.
sum
(
losses
.
view
(
-
1
)
*
loss_mask
)
/
(
loss_mask
.
sum
()
*
args
.
num_microbatches_in_minibatch
)
# Reduce loss for logging.
averaged_loss
=
average_losses_across_data_parallel_group
([
loss
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment