Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
fairscale
Commits
a842a927
Unverified
Commit
a842a927
authored
Nov 18, 2020
by
Yuanyuan (Ana) Shen
Committed by
GitHub
Nov 18, 2020
Browse files
[feat] Add CPU support for pipe.py benchmarks (#188)
* Add CPU support for pipe.py benchmarks, CUDA-free
parent
f80b303c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
10 deletions
+18
-10
benchmarks/pipe.py
benchmarks/pipe.py
+17
-9
fairscale/nn/pipe/pipe.py
fairscale/nn/pipe/pipe.py
+1
-1
No files found.
benchmarks/pipe.py
View file @
a842a927
...
...
@@ -283,7 +283,9 @@ def train(lm_dataloader, model, criterion, optimizer, vocab_size, args):
num_params
=
reduce
(
operator
.
add
,
(
reduce
(
operator
.
mul
,
x
.
size
())
for
x
in
model
.
parameters
()))
if
model
.
group
:
total
=
torch
.
Tensor
([
num_params
]).
cuda
()
total
=
torch
.
Tensor
([
num_params
])
if
torch
.
cuda
.
is_available
():
total
=
total
.
cuda
()
torch
.
distributed
.
all_reduce
(
total
,
group
=
model
.
group
)
logging
.
info
(
f
"training model, #prams =
{
num_params
}
, group:
{
model
.
group
.
rank
()
}
, grank:"
...
...
@@ -305,6 +307,8 @@ def train(lm_dataloader, model, criterion, optimizer, vocab_size, args):
if
isinstance
(
model
,
DDP
):
model
=
model
.
module
if
not
torch
.
cuda
.
is_available
():
return
torch
.
device
(
"cpu"
)
if
model
.
devices
:
return
model
.
devices
[
0
]
else
:
...
...
@@ -313,6 +317,9 @@ def train(lm_dataloader, model, criterion, optimizer, vocab_size, args):
def
get_last_device
(
model
):
if
isinstance
(
model
,
DDP
):
model
=
model
.
module
if
not
torch
.
cuda
.
is_available
():
return
torch
.
device
(
"cpu"
)
if
model
.
devices
:
return
model
.
devices
[
-
1
]
else
:
...
...
@@ -491,8 +498,8 @@ def generate_balance(num_devices, num_layers):
def
make_model_and_data
(
args
,
device
,
new_data
:
bool
=
True
):
device
=
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
"cpu"
)
if
new_data
:
device
=
torch
.
device
(
"cuda"
)
vocab_size
=
10000
model
,
criterion
,
optimizer
,
scaler
=
make_model
(
args
,
device
,
vocab_size
)
lm_dataset
=
BenchmarkLMDataset
()
...
...
@@ -507,7 +514,6 @@ def make_model_and_data(args, device, new_data: bool = True):
"vocab_size"
:
vocab_size
,
}
else
:
device
=
torch
.
device
(
"cuda"
)
data
=
get_data
(
device
)
ntokens
,
train_data
,
val_data
,
test_data
=
data
model
,
criterion
,
optimizer
,
scaler
=
make_model
(
args
,
device
,
ntokens
)
...
...
@@ -520,10 +526,10 @@ def make_model_and_data(args, device, new_data: bool = True):
def
bench_single_process
(
args
):
num_devices
=
torch
.
cuda
.
device_count
()
num_devices
=
torch
.
cuda
.
device_count
()
if
torch
.
cuda
.
is_available
()
else
1
assert
num_devices
>
0
init_random_seed
(
0
)
device
=
torch
.
device
(
"cuda"
)
device
=
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
"cpu"
)
new_data
=
True
...
...
@@ -557,12 +563,13 @@ def run_mp_worker(args, available_workers):
style
=
Pipe
.
AsyncSchedule
,
chunks
=
args
.
chunks
,
worker_map
=
get_worker_map
(),
input_device
=
torch
.
cuda
.
current_
device
(),
input_device
=
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
"cpu"
),
pipelined_backward
=
args
.
pipelined_backward
,
checkpoint
=
args
.
checkpoint
,
# loss_fn=blob["criterion"],
).
cuda
()
)
if
torch
.
cuda
.
is_available
():
p
=
p
.
cuda
()
if
args
.
all_at_once
and
p
.
pipeline
:
print
(
f
"running all at once"
)
p
.
pipeline
.
all_at_once
=
True
...
...
@@ -678,7 +685,8 @@ parser.set_defaults(pipelined_backward=True)
if
__name__
==
"__main__"
:
args
=
parser
.
parse_args
()
# bench_multi_process(args, all_at_once=True)
bench_multi_process
(
args
,
all_at_once
=
True
)
if
args
.
no_mpi
or
"OMPI_COMM_WORLD_RANK"
not
in
os
.
environ
:
print
(
f
"Running benchmark with args:
{
args
}
"
)
bench_single_process
(
args
)
...
...
fairscale/nn/pipe/pipe.py
View file @
a842a927
...
...
@@ -736,7 +736,7 @@ class Pipe(Module):
from
.phony
import
get_phony
phony
=
get_phony
(
torch
.
device
(
torch
.
cuda
.
current_device
()),
requires_grad
=
True
)
phony
=
get_phony
(
torch
.
device
(
torch
.
cuda
.
current_device
()
if
torch
.
cuda
.
is_available
()
else
"cpu"
),
requires_grad
=
True
)
output
=
PipelinedBackwardPass
.
apply
(
output
,
batches
,
phony
,
True
)
# self.retain_graph)
else
:
output
=
microbatch
.
gather
(
batches
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment