Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
d787935a
Unverified
Commit
d787935a
authored
Nov 05, 2020
by
Stas Bekman
Committed by
GitHub
Nov 05, 2020
Browse files
[s2s] test_distributed_eval (#8315)
Co-authored-by:
Sam Shleifer
<
sshleifer@gmail.com
>
parent
04e442d5
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
56 additions
and
8 deletions
+56
-8
docs/source/testing.rst
docs/source/testing.rst
+2
-1
examples/seq2seq/test_finetune_trainer.py
examples/seq2seq/test_finetune_trainer.py
+3
-6
examples/seq2seq/test_seq2seq_examples_multi_gpu.py
examples/seq2seq/test_seq2seq_examples_multi_gpu.py
+35
-1
src/transformers/testing_utils.py
src/transformers/testing_utils.py
+16
-0
No files found.
docs/source/testing.rst
View file @
d787935a
...
...
@@ -450,7 +450,8 @@ Inside tests:
.. code-block:: bash
torch.cuda.device_count()
from transformers.testing_utils import get_gpu_count
n_gpu = get_gpu_count() # works with torch and tf
...
...
examples/seq2seq/test_finetune_trainer.py
View file @
d787935a
...
...
@@ -2,9 +2,9 @@ import os
import
sys
from
unittest.mock
import
patch
from
transformers
import
BertTokenizer
,
EncoderDecoderModel
,
is_torch_available
from
transformers
import
BertTokenizer
,
EncoderDecoderModel
from
transformers.file_utils
import
is_datasets_available
from
transformers.testing_utils
import
TestCasePlus
,
execute_subprocess_async
,
slow
from
transformers.testing_utils
import
TestCasePlus
,
execute_subprocess_async
,
get_gpu_count
,
slow
from
transformers.trainer_callback
import
TrainerState
from
transformers.trainer_utils
import
set_seed
...
...
@@ -13,9 +13,6 @@ from .seq2seq_trainer import Seq2SeqTrainer
from
.test_seq2seq_examples
import
MBART_TINY
if
is_torch_available
():
import
torch
set_seed
(
42
)
MARIAN_MODEL
=
"sshleifer/student_marian_en_ro_6_1"
...
...
@@ -196,7 +193,7 @@ class TestFinetuneTrainer(TestCasePlus):
"""
.
split
()
# --eval_beams 2
n_gpu
=
torch
.
cuda
.
device
_count
()
n_gpu
=
get_gpu
_count
()
if
n_gpu
>
1
:
distributed_args
=
f
"""
-m torch.distributed.launch
...
...
examples/seq2seq/test_seq2seq_examples_multi_gpu.py
View file @
d787935a
...
...
@@ -3,7 +3,14 @@
import
os
import
sys
from
transformers.testing_utils
import
TestCasePlus
,
execute_subprocess_async
,
require_torch_multigpu
from
transformers.testing_utils
import
(
TestCasePlus
,
execute_subprocess_async
,
get_gpu_count
,
require_torch_gpu
,
require_torch_multigpu
,
slow
,
)
from
.test_seq2seq_examples
import
CHEAP_ARGS
,
make_test_data_dir
from
.utils
import
load_json
...
...
@@ -80,3 +87,30 @@ class TestSummarizationDistillerMultiGPU(TestCasePlus):
self
.
assertEqual
(
len
(
metrics
[
"test"
]),
1
)
desired_n_evals
=
int
(
args_d
[
"max_epochs"
]
*
(
1
/
args_d
[
"val_check_interval"
])
/
2
+
1
)
self
.
assertEqual
(
len
(
metrics
[
"val"
]),
desired_n_evals
)
@
slow
@
require_torch_gpu
def
test_distributed_eval
(
self
):
output_dir
=
self
.
get_auto_remove_tmp_dir
()
args
=
f
"""
--model_name Helsinki-NLP/opus-mt-en-ro
--save_dir
{
output_dir
}
--data_dir test_data/wmt_en_ro
--num_beams 2
--task translation
"""
.
split
()
# we want this test to run even if there is only one GPU, but if there are more we use them all
n_gpu
=
get_gpu_count
()
distributed_args
=
f
"""
-m torch.distributed.launch
--nproc_per_node=
{
n_gpu
}
{
self
.
test_file_dir
}
/run_distributed_eval.py
"""
.
split
()
cmd
=
[
sys
.
executable
]
+
distributed_args
+
args
execute_subprocess_async
(
cmd
,
env
=
self
.
get_env
())
metrics_save_path
=
os
.
path
.
join
(
output_dir
,
"test_bleu.json"
)
metrics
=
load_json
(
metrics_save_path
)
# print(metrics)
self
.
assertGreaterEqual
(
metrics
[
"bleu"
],
25
)
src/transformers/testing_utils.py
View file @
d787935a
...
...
@@ -297,6 +297,22 @@ def require_ray(test_case):
return
test_case
def
get_gpu_count
():
"""
Return the number of available gpus (regardless of whether torch or tf is used)
"""
if
_torch_available
:
import
torch
return
torch
.
cuda
.
device_count
()
elif
_tf_available
:
import
tensorflow
as
tf
return
len
(
tf
.
config
.
list_physical_devices
(
"GPU"
))
else
:
return
0
def
get_tests_dir
(
append_path
=
None
):
"""
Args:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment