Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c60e0e1e
Unverified
Commit
c60e0e1e
authored
Jan 15, 2021
by
Stas Bekman
Committed by
GitHub
Jan 15, 2021
Browse files
deepspeed + grad acumm (#9622)
parent
6d3b688b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
1 deletion
+8
-1
examples/seq2seq/test_finetune_trainer.py
examples/seq2seq/test_finetune_trainer.py
+5
-0
src/transformers/trainer.py
src/transformers/trainer.py
+3
-1
No files found.
examples/seq2seq/test_finetune_trainer.py
View file @
c60e0e1e
...
...
@@ -112,6 +112,11 @@ class TestFinetuneTrainer(TestCasePlus):
def
test_finetune_trainer_deepspeed
(
self
):
self
.
finetune_trainer_quick
(
deepspeed
=
True
)
@
require_torch_multi_gpu
@
require_deepspeed
def
test_finetune_trainer_deepspeed_grad_acum
(
self
):
self
.
finetune_trainer_quick
(
deepspeed
=
True
,
extra_args_str
=
"--gradient_accumulation_steps 2"
)
@
slow
def
test_finetune_trainer_slow
(
self
):
# There is a missing call to __init__process_group somewhere
...
...
src/transformers/trainer.py
View file @
c60e0e1e
...
...
@@ -931,7 +931,9 @@ class Trainer:
)
# Optimizer step
if
is_torch_tpu_available
():
if
self
.
deepspeed
:
self
.
deepspeed
.
step
()
elif
is_torch_tpu_available
():
xm
.
optimizer_step
(
self
.
optimizer
)
elif
self
.
use_amp
:
self
.
scaler
.
step
(
self
.
optimizer
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment