Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
b1a8aa94
Unverified
Commit
b1a8aa94
authored
Jun 09, 2021
by
Stas Bekman
Committed by
GitHub
Jun 09, 2021
Browse files
[test] support more than 2 gpus (#12074)
* support more than 2 gpus * style
parent
d3eacbb8
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
2 deletions
+5
-2
tests/test_trainer.py
tests/test_trainer.py
+5
-2
No files found.
tests/test_trainer.py
View file @
b1a8aa94
...
@@ -34,6 +34,7 @@ from transformers.testing_utils import (
...
@@ -34,6 +34,7 @@ from transformers.testing_utils import (
PASS
,
PASS
,
USER
,
USER
,
TestCasePlus
,
TestCasePlus
,
get_gpu_count
,
get_tests_dir
,
get_tests_dir
,
is_staging_test
,
is_staging_test
,
require_datasets
,
require_datasets
,
...
@@ -1113,15 +1114,17 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
...
@@ -1113,15 +1114,17 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# this is a sensitive test so let's keep debugging printouts in place for quick diagnosis.
# this is a sensitive test so let's keep debugging printouts in place for quick diagnosis.
# it's using pretty large safety margins, but small enough to detect broken functionality.
# it's using pretty large safety margins, but small enough to detect broken functionality.
debug
=
0
debug
=
0
n_gpus
=
get_gpu_count
()
bs
=
8
bs
=
8
eval_len
=
16
*
n_gpus
# make the params somewhat big so that there will be enough RAM consumed to be able to
# make the params somewhat big so that there will be enough RAM consumed to be able to
# measure things. We should get about 64KB for a+b in fp32
# measure things. We should get about 64KB for a+b in fp32
a
=
torch
.
ones
(
1000
,
bs
)
+
0.001
a
=
torch
.
ones
(
1000
,
bs
)
+
0.001
b
=
torch
.
ones
(
1000
,
bs
)
-
0.001
b
=
torch
.
ones
(
1000
,
bs
)
-
0.001
# 1. with mem metrics enabled
# 1. with mem metrics enabled
trainer
=
get_regression_trainer
(
a
=
a
,
b
=
b
,
eval_len
=
16
,
skip_memory_metrics
=
False
)
trainer
=
get_regression_trainer
(
a
=
a
,
b
=
b
,
eval_len
=
eval_len
,
skip_memory_metrics
=
False
)
metrics
=
trainer
.
evaluate
()
metrics
=
trainer
.
evaluate
()
del
trainer
del
trainer
gc
.
collect
()
gc
.
collect
()
...
@@ -1142,7 +1145,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
...
@@ -1142,7 +1145,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
self
.
assertLess
(
fp32_eval
,
5_000
)
self
.
assertLess
(
fp32_eval
,
5_000
)
# 2. with mem metrics disabled
# 2. with mem metrics disabled
trainer
=
get_regression_trainer
(
a
=
a
,
b
=
b
,
eval_len
=
16
,
fp16_full_eval
=
True
,
skip_memory_metrics
=
False
)
trainer
=
get_regression_trainer
(
a
=
a
,
b
=
b
,
eval_len
=
eval_len
,
fp16_full_eval
=
True
,
skip_memory_metrics
=
False
)
metrics
=
trainer
.
evaluate
()
metrics
=
trainer
.
evaluate
()
fp16_init
=
metrics
[
"init_mem_gpu_alloc_delta"
]
fp16_init
=
metrics
[
"init_mem_gpu_alloc_delta"
]
fp16_eval
=
metrics
[
"eval_mem_gpu_alloc_delta"
]
fp16_eval
=
metrics
[
"eval_mem_gpu_alloc_delta"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment