Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1a6fb930
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "7e7f743481abff9bcabdf73047dffb7c1db9d18b"
Unverified
Commit
1a6fb930
authored
Jun 21, 2023
by
Zach Mueller
Committed by
GitHub
Jun 21, 2023
Browse files
Clean up dist import (#24402)
parent
285a4801
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
14 deletions
+6
-14
src/transformers/training_args.py
src/transformers/training_args.py
+6
-14
No files found.
src/transformers/training_args.py
View file @
1a6fb930
...
...
@@ -87,9 +87,9 @@ if is_torch_neuroncore_available(check_device=False):
)
import
torch_xla.distributed.xla_backend
as
xbn
if
not
isinstance
(
torch
.
distributed
.
group
.
WORLD
,
xbn
.
ProcessGroupXla
):
torch
.
distributed
.
init_process_group
(
backend
=
"xla"
)
if
not
isinstance
(
torch
.
distributed
.
group
.
WORLD
,
xbn
.
ProcessGroupXla
):
if
not
isinstance
(
dist
.
group
.
WORLD
,
xbn
.
ProcessGroupXla
):
dist
.
init_process_group
(
backend
=
"xla"
)
if
not
isinstance
(
dist
.
group
.
WORLD
,
xbn
.
ProcessGroupXla
):
raise
AssertionError
(
"Failed to initialize torch.distributed process group using XLA backend."
)
...
...
@@ -1716,11 +1716,7 @@ class TrainingArguments:
if
not
is_sagemaker_mp_enabled
():
device
=
self
.
distributed_state
.
device
self
.
local_rank
=
self
.
distributed_state
.
local_process_index
if
(
torch
.
distributed
.
is_available
()
and
torch
.
distributed
.
is_initialized
()
and
self
.
parallel_mode
!=
ParallelMode
.
DISTRIBUTED
):
if
dist
.
is_available
()
and
dist
.
is_initialized
()
and
self
.
parallel_mode
!=
ParallelMode
.
DISTRIBUTED
:
logger
.
warning
(
"torch.distributed process group is initialized, but parallel_mode != ParallelMode.DISTRIBUTED. "
"In order to use Torch DDP, launch your script with `python -m torch.distributed.launch"
...
...
@@ -1963,10 +1959,8 @@ class TrainingArguments:
logger
.
debug
(
f
"
{
self
.
process_index
}
: waiting for the
{
main_process_desc
}
to perform
{
desc
}
"
)
if
is_torch_tpu_available
():
xm
.
rendezvous
(
desc
)
elif
is_sagemaker_dp_enabled
():
dist
.
barrier
()
else
:
torch
.
distributed
.
barrier
()
dist
.
barrier
()
yield
finally
:
if
is_main_process
:
...
...
@@ -1974,10 +1968,8 @@ class TrainingArguments:
logger
.
debug
(
f
"
{
self
.
process_index
}
:
{
main_process_desc
}
completed
{
desc
}
, releasing all replicas"
)
if
is_torch_tpu_available
():
xm
.
rendezvous
(
desc
)
elif
is_sagemaker_dp_enabled
():
dist
.
barrier
()
else
:
torch
.
distributed
.
barrier
()
dist
.
barrier
()
else
:
yield
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment