Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
143289dc
Unverified
Commit
143289dc
authored
Jan 04, 2021
by
Stas Bekman
Committed by
GitHub
Jan 04, 2021
Browse files
[test_model_parallelization] multiple fixes (#9354)
parent
086718ac
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
20 deletions
+26
-20
tests/test_modeling_common.py
tests/test_modeling_common.py
+26
-20
No files found.
tests/test_modeling_common.py
View file @
143289dc
...
...
@@ -14,6 +14,7 @@
# limitations under the License.
import
copy
import
gc
import
inspect
import
os.path
import
random
...
...
@@ -1081,15 +1082,15 @@ class ModelTesterMixin:
if
not
self
.
test_model_parallel
:
return
import
subprocess
# a candidate for testing_utils
def
get_current_gpu_memory_use
():
run_process
=
subprocess
.
Popen
(
"nvidia-smi --query-gpu=memory.used --format=csv,nounits,noheader"
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
)
""" returns a list of cuda memory allocations per GPU in MBs"""
per_device_memory
=
[]
for
id
in
range
(
torch
.
cuda
.
device_count
()):
with
torch
.
cuda
.
device
(
id
):
per_device_memory
.
append
(
torch
.
cuda
.
memory_allocated
()
>>
20
)
memory_usage
=
run_process
.
stdout
.
read
().
decode
(
"utf-8"
).
strip
()
per_device_memory
=
[
int
(
memory
)
for
memory
in
memory_usage
.
split
(
"
\n
"
)]
return
per_device_memory
# Needs a large model to see the difference.
...
...
@@ -1098,39 +1099,44 @@ class ModelTesterMixin:
for
model_class
in
self
.
all_parallelizable_model_classes
:
torch
.
cuda
.
empty_cache
()
# Retrieve initial memory usage (should be close to 0)
initial_memory
=
get_current_gpu_memory_use
()
# 1. single gpu memory load + unload + memory measurements
# Retrieve initial memory usage (can easily be ~0.6-1.5GB if cuda-kernels have been preloaded by previous tests)
memory_at_start
=
get_current_gpu_memory_use
()
# Put model on device
model
=
model_class
(
config
.
from_pretrained
(
"gpt2"
)
)
# Put model on device
0 and take a memory snapshot
model
=
model_class
(
config
)
model
.
to
(
"cuda:0"
)
# Retrieve the memory after the model is put on the device
memory_after_model_load
=
get_current_gpu_memory_use
()
# The memory use on device 0 should be higher than it was initially.
self
.
assertGreater
(
memory_after_model_load
[
0
],
memory_at_start
[
0
])
del
model
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
# The memory use on that device should be higher than it was initially.
self
.
assertGreater
(
memory_after_model_load
[
0
],
initial_memory
[
0
])
# 2. MP test
# it's essential to re-calibrate the usage before the next stage
memory_at_start
=
get_current_gpu_memory_use
()
# Spread model layers over multiple devices
model
=
model_class
(
config
.
from_pretrained
(
"gpt2"
)
)
model
=
model_class
(
config
)
model
.
parallelize
()
memory_after_parallelization
=
get_current_gpu_memory_use
()
# Assert that the memory use on all devices is higher than it was when loaded only on CPU
for
n
in
range
(
torch
.
cuda
.
device_count
()):
self
.
assertGreater
(
memory_after_parallelization
[
n
],
initial_memory
[
n
])
self
.
assertGreater
(
memory_after_parallelization
[
n
],
memory_at_start
[
n
])
# Assert that the memory use of
the first
device is lower than it was when the entire model was loaded on it
# Assert that the memory use of device
0
is lower than it was when the entire model was loaded on it
self
.
assertLess
(
memory_after_parallelization
[
0
],
memory_after_model_load
[
0
])
# Assert that the memory use of
the second
device is higher than it was when the entire model was loaded
# on
the other device.
# Assert that the memory use of device
1
is higher than it was when the entire model was loaded
# on
device 0 and device 1 wasn't used at all
self
.
assertGreater
(
memory_after_parallelization
[
1
],
memory_after_model_load
[
1
])
del
model
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
@
require_torch_multi_gpu
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment