Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1420b5ff
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "fb5665b5adff6d95ceeb7cfefd71ff24cac05ed7"
Unverified
Commit
1420b5ff
authored
Jan 29, 2021
by
Stas Bekman
Committed by
GitHub
Jan 29, 2021
Browse files
refactor deepspeed setup devices (#9880)
parent
6bf94bc0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
15 deletions
+15
-15
src/transformers/training_args.py
src/transformers/training_args.py
+15
-15
No files found.
src/transformers/training_args.py
View file @
1420b5ff
...
@@ -535,6 +535,20 @@ class TrainingArguments:
...
@@ -535,6 +535,20 @@ class TrainingArguments:
self
.
local_rank
=
dist
.
get_local_rank
()
self
.
local_rank
=
dist
.
get_local_rank
()
device
=
torch
.
device
(
"cuda"
,
self
.
local_rank
)
device
=
torch
.
device
(
"cuda"
,
self
.
local_rank
)
self
.
_n_gpu
=
1
self
.
_n_gpu
=
1
elif
self
.
deepspeed
:
# deepspeed performs its own DDP internally, and requires the program to be started with:
# deepspeed ./program.py
# rather than:
# python -m torch.distributed.launch --nproc_per_node=2 ./program.py
from
.integrations
import
is_deepspeed_available
if
not
is_deepspeed_available
():
raise
ImportError
(
"--deepspeed requires deepspeed: `pip install deepspeed`."
)
import
deepspeed
deepspeed
.
init_distributed
()
device
=
torch
.
device
(
"cuda"
,
self
.
local_rank
)
self
.
_n_gpu
=
1
elif
self
.
local_rank
==
-
1
:
elif
self
.
local_rank
==
-
1
:
# if n_gpu is > 1 we'll use nn.DataParallel.
# if n_gpu is > 1 we'll use nn.DataParallel.
# If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
# If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
...
@@ -549,21 +563,7 @@ class TrainingArguments:
...
@@ -549,21 +563,7 @@ class TrainingArguments:
else
:
else
:
# Here, we'll use torch.distributed.
# Here, we'll use torch.distributed.
# Initializes the distributed backend which will take care of synchronizing nodes/GPUs
# Initializes the distributed backend which will take care of synchronizing nodes/GPUs
#
torch
.
distributed
.
init_process_group
(
backend
=
"nccl"
)
# deepspeed performs its own DDP internally, and requires the program to be started with:
# deepspeed ./program.py
# rather than:
# python -m torch.distributed.launch --nproc_per_node=2 ./program.py
if
self
.
deepspeed
:
from
.integrations
import
is_deepspeed_available
if
not
is_deepspeed_available
():
raise
ImportError
(
"--deepspeed requires deepspeed: `pip install deepspeed`."
)
import
deepspeed
deepspeed
.
init_distributed
()
else
:
torch
.
distributed
.
init_process_group
(
backend
=
"nccl"
)
device
=
torch
.
device
(
"cuda"
,
self
.
local_rank
)
device
=
torch
.
device
(
"cuda"
,
self
.
local_rank
)
self
.
_n_gpu
=
1
self
.
_n_gpu
=
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment