Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
99eb9b52
Unverified
Commit
99eb9b52
authored
Jul 21, 2022
by
Zachary Mueller
Committed by
GitHub
Jul 21, 2022
Browse files
Fix `no_trainer` CI (#18242)
* Fix all tests
parent
561b9a8c
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
46 additions
and
14 deletions
+46
-14
examples/pytorch/test_accelerate_examples.py
examples/pytorch/test_accelerate_examples.py
+22
-13
src/transformers/testing_utils.py
src/transformers/testing_utils.py
+24
-1
No files found.
examples/pytorch/test_accelerate_examples.py
View file @
99eb9b52
...
@@ -19,14 +19,14 @@ import json
...
@@ -19,14 +19,14 @@ import json
import
logging
import
logging
import
os
import
os
import
shutil
import
shutil
import
subprocess
import
sys
import
sys
import
tempfile
import
tempfile
from
unittest
import
mock
import
torch
import
torch
from
accelerate.utils
import
write_basic_config
from
accelerate.utils
import
write_basic_config
from
transformers.testing_utils
import
TestCasePlus
,
get_gpu_count
,
slow
,
torch_device
from
transformers.testing_utils
import
TestCasePlus
,
get_gpu_count
,
run_command
,
slow
,
torch_device
from
transformers.utils
import
is_apex_available
from
transformers.utils
import
is_apex_available
...
@@ -75,6 +75,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -75,6 +75,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
def
tearDownClass
(
cls
):
def
tearDownClass
(
cls
):
shutil
.
rmtree
(
cls
.
tmpdir
)
shutil
.
rmtree
(
cls
.
tmpdir
)
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_glue_no_trainer
(
self
):
def
test_run_glue_no_trainer
(
self
):
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
testargs
=
f
"""
testargs
=
f
"""
...
@@ -94,12 +95,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -94,12 +95,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
if
is_cuda_and_apex_available
():
if
is_cuda_and_apex_available
():
testargs
.
append
(
"--fp16"
)
testargs
.
append
(
"--fp16"
)
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
self
.
assertGreaterEqual
(
result
[
"eval_accuracy"
],
0.75
)
self
.
assertGreaterEqual
(
result
[
"eval_accuracy"
],
0.75
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"glue_no_trainer"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"glue_no_trainer"
)))
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_clm_no_trainer
(
self
):
def
test_run_clm_no_trainer
(
self
):
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
testargs
=
f
"""
testargs
=
f
"""
...
@@ -120,12 +122,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -120,12 +122,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
# Skipping because there are not enough batches to train the model + would need a drop_last to work.
# Skipping because there are not enough batches to train the model + would need a drop_last to work.
return
return
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
self
.
assertLess
(
result
[
"perplexity"
],
100
)
self
.
assertLess
(
result
[
"perplexity"
],
100
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"clm_no_trainer"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"clm_no_trainer"
)))
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_mlm_no_trainer
(
self
):
def
test_run_mlm_no_trainer
(
self
):
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
testargs
=
f
"""
testargs
=
f
"""
...
@@ -139,12 +142,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -139,12 +142,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
--with_tracking
"""
.
split
()
"""
.
split
()
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
self
.
assertLess
(
result
[
"perplexity"
],
42
)
self
.
assertLess
(
result
[
"perplexity"
],
42
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"mlm_no_trainer"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"mlm_no_trainer"
)))
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_ner_no_trainer
(
self
):
def
test_run_ner_no_trainer
(
self
):
# with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
# with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
epochs
=
7
if
get_gpu_count
()
>
1
else
2
epochs
=
7
if
get_gpu_count
()
>
1
else
2
...
@@ -165,13 +169,14 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -165,13 +169,14 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
--with_tracking
"""
.
split
()
"""
.
split
()
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
self
.
assertGreaterEqual
(
result
[
"eval_accuracy"
],
0.75
)
self
.
assertGreaterEqual
(
result
[
"eval_accuracy"
],
0.75
)
self
.
assertLess
(
result
[
"train_loss"
],
0.5
)
self
.
assertLess
(
result
[
"train_loss"
],
0.5
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"ner_no_trainer"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"ner_no_trainer"
)))
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_squad_no_trainer
(
self
):
def
test_run_squad_no_trainer
(
self
):
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
testargs
=
f
"""
testargs
=
f
"""
...
@@ -190,7 +195,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -190,7 +195,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
--with_tracking
"""
.
split
()
"""
.
split
()
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
# Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics.
# Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics.
self
.
assertGreaterEqual
(
result
[
"eval_f1"
],
28
)
self
.
assertGreaterEqual
(
result
[
"eval_f1"
],
28
)
...
@@ -198,6 +203,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -198,6 +203,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"qa_no_trainer"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"qa_no_trainer"
)))
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_swag_no_trainer
(
self
):
def
test_run_swag_no_trainer
(
self
):
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
testargs
=
f
"""
testargs
=
f
"""
...
@@ -214,12 +220,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -214,12 +220,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
--with_tracking
"""
.
split
()
"""
.
split
()
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
self
.
assertGreaterEqual
(
result
[
"eval_accuracy"
],
0.8
)
self
.
assertGreaterEqual
(
result
[
"eval_accuracy"
],
0.8
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"swag_no_trainer"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"swag_no_trainer"
)))
@
slow
@
slow
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_summarization_no_trainer
(
self
):
def
test_run_summarization_no_trainer
(
self
):
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
testargs
=
f
"""
testargs
=
f
"""
...
@@ -237,7 +244,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -237,7 +244,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
--with_tracking
"""
.
split
()
"""
.
split
()
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
self
.
assertGreaterEqual
(
result
[
"eval_rouge1"
],
10
)
self
.
assertGreaterEqual
(
result
[
"eval_rouge1"
],
10
)
self
.
assertGreaterEqual
(
result
[
"eval_rouge2"
],
2
)
self
.
assertGreaterEqual
(
result
[
"eval_rouge2"
],
2
)
...
@@ -247,6 +254,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -247,6 +254,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"summarization_no_trainer"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"summarization_no_trainer"
)))
@
slow
@
slow
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_translation_no_trainer
(
self
):
def
test_run_translation_no_trainer
(
self
):
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
testargs
=
f
"""
testargs
=
f
"""
...
@@ -268,7 +276,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -268,7 +276,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
--with_tracking
"""
.
split
()
"""
.
split
()
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
self
.
assertGreaterEqual
(
result
[
"eval_bleu"
],
30
)
self
.
assertGreaterEqual
(
result
[
"eval_bleu"
],
30
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"epoch_0"
)))
...
@@ -292,10 +300,11 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -292,10 +300,11 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--checkpointing_steps epoch
--checkpointing_steps epoch
"""
.
split
()
"""
.
split
()
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
self
.
assertGreaterEqual
(
result
[
"eval_overall_accuracy"
],
0.10
)
self
.
assertGreaterEqual
(
result
[
"eval_overall_accuracy"
],
0.10
)
@
mock
.
patch
.
dict
(
os
.
environ
,
{
"WANDB_MODE"
:
"offline"
})
def
test_run_image_classification_no_trainer
(
self
):
def
test_run_image_classification_no_trainer
(
self
):
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
tmp_dir
=
self
.
get_auto_remove_tmp_dir
()
testargs
=
f
"""
testargs
=
f
"""
...
@@ -316,9 +325,9 @@ class ExamplesTestsNoTrainer(TestCasePlus):
...
@@ -316,9 +325,9 @@ class ExamplesTestsNoTrainer(TestCasePlus):
if
is_cuda_and_apex_available
():
if
is_cuda_and_apex_available
():
testargs
.
append
(
"--fp16"
)
testargs
.
append
(
"--fp16"
)
_
=
subprocess
.
run
(
self
.
_launch_args
+
testargs
,
stdout
=
subprocess
.
PIPE
)
run_command
(
self
.
_launch_args
+
testargs
)
result
=
get_results
(
tmp_dir
)
result
=
get_results
(
tmp_dir
)
# The base model scores a 25%
# The base model scores a 25%
self
.
assertGreaterEqual
(
result
[
"eval_accuracy"
],
0.6
25
)
self
.
assertGreaterEqual
(
result
[
"eval_accuracy"
],
0.6
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"step_1"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"step_1"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"image_classification_no_trainer"
)))
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
tmp_dir
,
"image_classification_no_trainer"
)))
src/transformers/testing_utils.py
View file @
99eb9b52
...
@@ -20,6 +20,7 @@ import os
...
@@ -20,6 +20,7 @@ import os
import
re
import
re
import
shlex
import
shlex
import
shutil
import
shutil
import
subprocess
import
sys
import
sys
import
tempfile
import
tempfile
import
unittest
import
unittest
...
@@ -27,7 +28,7 @@ from collections.abc import Mapping
...
@@ -27,7 +28,7 @@ from collections.abc import Mapping
from
distutils.util
import
strtobool
from
distutils.util
import
strtobool
from
io
import
StringIO
from
io
import
StringIO
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
Iterator
,
Union
from
typing
import
Iterator
,
List
,
Union
from
unittest
import
mock
from
unittest
import
mock
from
transformers
import
logging
as
transformers_logging
from
transformers
import
logging
as
transformers_logging
...
@@ -1561,3 +1562,25 @@ def to_2tuple(x):
...
@@ -1561,3 +1562,25 @@ def to_2tuple(x):
if
isinstance
(
x
,
collections
.
abc
.
Iterable
):
if
isinstance
(
x
,
collections
.
abc
.
Iterable
):
return
x
return
x
return
(
x
,
x
)
return
(
x
,
x
)
# These utils relate to ensuring the right error message is received when running scripts
class
SubprocessCallException
(
Exception
):
pass
def
run_command
(
command
:
List
[
str
],
return_stdout
=
False
):
"""
Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
if an error occured while running `command`
"""
try
:
output
=
subprocess
.
check_output
(
command
,
stderr
=
subprocess
.
STDOUT
)
if
return_stdout
:
if
hasattr
(
output
,
"decode"
):
output
=
output
.
decode
(
"utf-8"
)
return
output
except
subprocess
.
CalledProcessError
as
e
:
raise
SubprocessCallException
(
f
"Command `
{
' '
.
join
(
command
)
}
` failed with the following error:
\n\n
{
e
.
output
.
decode
()
}
"
)
from
e
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment