Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
6ad9c8f7
Unverified
Commit
6ad9c8f7
authored
Apr 24, 2024
by
Zach Mueller
Committed by
GitHub
Apr 24, 2024
Browse files
Non blocking support to torch DL's (#30465)
* Non blocking support * Check for optimization * Doc
parent
5c57463b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
0 deletions
+27
-0
src/transformers/trainer.py
src/transformers/trainer.py
+12
-0
src/transformers/trainer_pt_utils.py
src/transformers/trainer_pt_utils.py
+15
-0
No files found.
src/transformers/trainer.py
View file @
6ad9c8f7
...
...
@@ -4361,6 +4361,18 @@ class Trainer:
even_batches
=
accelerator_config
.
pop
(
"even_batches"
),
use_seedable_sampler
=
accelerator_config
.
pop
(
"use_seedable_sampler"
),
)
non_blocking
=
accelerator_config
.
pop
(
"non_blocking"
)
if
not
is_accelerate_available
(
"0.30.0"
):
if
non_blocking
:
raise
ImportError
(
"`non_blocking` is only supported in accelerate v0.30.0 and above. Please upgrade accelerate to use this feature."
)
else
:
if
non_blocking
and
not
self
.
args
.
dataloader_pin_memory
:
logger
.
warning
(
"`non_blocking` is enabled but `dataloader_pin_memory` is not. For the best performance, it's recommended to enable both."
)
dataloader_config
.
non_blocking
=
non_blocking
# this would have been updated above, no need for it anymore
accelerator_config
.
pop
(
"gradient_accumulation_kwargs"
)
...
...
src/transformers/trainer_pt_utils.py
View file @
6ad9c8f7
...
...
@@ -1246,6 +1246,10 @@ class AcceleratorConfig:
The [`accelerate.utils.GradientAccumulationPlugin`] default is `True`.
sync_each_batch (`bool`): Whether to synchronize the gradients at each data batch.
The [`accelerate.utils.GradientAccumulationPlugin`] default is `False`.
non_blocking (`bool`, *optional*, defaults to `False`):
Whether to use non-blocking CUDA calls to help minimize synchronization during
distributed training with prepared `DataLoader` inputs being moved to device.
Best if used with `pin_memory=True` in the `TrainingArguments`.
"""
...
...
@@ -1284,6 +1288,17 @@ class AcceleratorConfig:
"multiple different seeds to compare. Should also be ran with [`~utils.set_seed`] for the best results."
},
)
non_blocking
:
Optional
[
bool
]
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Whether to use non-blocking CUDA calls to help minimize synchronization during "
"distributed training with prepared `DataLoader` inputs being moved to device. "
"Best if used with `pin_memory=True` in the `TrainingArguments`. Requires accelerate "
"v0.30.0."
},
)
gradient_accumulation_kwargs
:
Optional
[
Dict
]
=
field
(
default
=
None
,
metadata
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment