Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
7822cd38
Unverified
Commit
7822cd38
authored
May 14, 2020
by
Sam Shleifer
Committed by
GitHub
May 14, 2020
Browse files
[tests] make pipelines tests faster with smaller models (#4238)
covers torch and tf. Also fixes a failing @slow test
parent
448c4672
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
134 additions
and
247 deletions
+134
-247
src/transformers/pipelines.py
src/transformers/pipelines.py
+3
-3
tests/test_pipelines.py
tests/test_pipelines.py
+131
-244
No files found.
src/transformers/pipelines.py
View file @
7822cd38
...
@@ -1513,7 +1513,7 @@ class TranslationPipeline(Pipeline):
...
@@ -1513,7 +1513,7 @@ class TranslationPipeline(Pipeline):
return
results
return
results
# Register all the supported task here
# Register all the supported task
s
here
SUPPORTED_TASKS
=
{
SUPPORTED_TASKS
=
{
"feature-extraction"
:
{
"feature-extraction"
:
{
"impl"
:
FeatureExtractionPipeline
,
"impl"
:
FeatureExtractionPipeline
,
...
@@ -1576,9 +1576,9 @@ SUPPORTED_TASKS = {
...
@@ -1576,9 +1576,9 @@ SUPPORTED_TASKS = {
"tf"
:
TFAutoModelWithLMHead
if
is_tf_available
()
else
None
,
"tf"
:
TFAutoModelWithLMHead
if
is_tf_available
()
else
None
,
"pt"
:
AutoModelWithLMHead
if
is_torch_available
()
else
None
,
"pt"
:
AutoModelWithLMHead
if
is_torch_available
()
else
None
,
"default"
:
{
"default"
:
{
"model"
:
{
"pt"
:
"bart-large-cnn"
,
"tf"
:
None
},
"model"
:
{
"pt"
:
"bart-large-cnn"
,
"tf"
:
"t5-small"
},
"config"
:
None
,
"config"
:
None
,
"tokenizer"
:
(
"bart-large-cnn"
,
{
"use_fast"
:
False
}),
"tokenizer"
:
{
"pt"
:
(
"bart-large-cnn"
,
{
"use_fast"
:
False
}),
"tf"
:
"t5-small"
},
},
},
},
},
"translation_en_to_fr"
:
{
"translation_en_to_fr"
:
{
...
...
tests/test_pipelines.py
View file @
7822cd38
...
@@ -2,94 +2,41 @@ import unittest
...
@@ -2,94 +2,41 @@ import unittest
from
typing
import
Iterable
,
List
,
Optional
from
typing
import
Iterable
,
List
,
Optional
from
transformers
import
pipeline
from
transformers
import
pipeline
from
transformers.pipelines
import
DefaultArgumentHandler
,
Pipeline
from
transformers.pipelines
import
SUPPORTED_TASKS
,
DefaultArgumentHandler
,
Pipeline
from
.utils
import
require_tf
,
require_torch
,
slow
from
.utils
import
require_tf
,
require_torch
,
slow
QA_FINETUNED_MODELS
=
[
NER_FINETUNED_MODELS
=
[
"sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"
]
((
"bert-base-uncased"
,
{
"use_fast"
:
False
}),
"bert-large-uncased-whole-word-masking-finetuned-squad"
,
None
),
((
"distilbert-base-cased-distilled-squad"
,
{
"use_fast"
:
False
}),
"distilbert-base-cased-distilled-squad"
,
None
),
]
TF_QA_FINETUNED_MODELS
=
[
((
"bert-base-uncased"
,
{
"use_fast"
:
False
}),
"bert-large-uncased-whole-word-masking-finetuned-squad"
,
None
),
((
"distilbert-base-cased-distilled-squad"
,
{
"use_fast"
:
False
}),
"distilbert-base-cased-distilled-squad"
,
None
),
]
TF_NER_FINETUNED_MODELS
=
{
(
"bert-base-cased"
,
"dbmdz/bert-large-cased-finetuned-conll03-english"
,
"dbmdz/bert-large-cased-finetuned-conll03-english"
,
)
}
NER_FINETUNED_MODELS
=
{
(
"bert-base-cased"
,
"dbmdz/bert-large-cased-finetuned-conll03-english"
,
"dbmdz/bert-large-cased-finetuned-conll03-english"
,
)
}
FEATURE_EXTRACT_FINETUNED_MODELS
=
{
(
"bert-base-cased"
,
"bert-base-cased"
,
None
),
# ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
(
"distilbert-base-cased"
,
"distilbert-base-cased"
,
None
),
}
TF_FEATURE_EXTRACT_FINETUNED_MODELS
=
{
# ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
(
"distilbert-base-cased"
,
"distilbert-base-cased"
,
None
),
}
TF_TEXT_CLASSIF_FINETUNED_MODELS
=
{
(
"bert-base-uncased"
,
"distilbert-base-uncased-finetuned-sst-2-english"
,
"distilbert-base-uncased-finetuned-sst-2-english"
,
)
}
TEXT_CLASSIF_FINETUNED_MODELS
=
{
# xlnet-base-cased disabled for now, since it crashes TF2
(
FEATURE_EXTRACT_FINETUNED_MODELS
=
[
"sshleifer/tiny-distilbert-base-cased"
]
"distilbert-base-cased"
,
TEXT_CLASSIF_FINETUNED_MODELS
=
[
"sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english"
]
"distilbert-base-uncased-finetuned-sst-2-english"
,
TEXT_GENERATION_FINETUNED_MODELS
=
[
"sshleifer/tiny-ctrl"
]
"distilbert-base-uncased-finetuned-sst-2-english"
,
)
}
TEXT_GENERATION_FINETUNED_MODELS
=
{
FILL_MASK_FINETUNED_MODELS
=
[
"sshleifer/tiny-distilroberta-base"
]
(
"gpt2"
,
"gpt2"
),
LARGE_FILL_MASK_FINETUNED_MODELS
=
[
"distilroberta-base"
]
# @slow
(
"xlnet-base-cased"
,
"xlnet-base-cased"
),
}
TF_TEXT_GENERATION_FINETUNED_MODELS
=
{
SUMMARIZATION_FINETUNED_MODELS
=
[
"sshleifer/bart-tiny-random"
,
"patrickvonplaten/t5-tiny-random"
]
(
"gpt2"
,
"gpt2"
),
TF_SUMMARIZATION_FINETUNED_MODELS
=
[
"patrickvonplaten/t5-tiny-random"
]
(
"xlnet-base-cased"
,
"xlnet-base-cased"
),
}
FILL_MASK_FINETUNED_MODELS
=
[
TRANSLATION_FINETUNED_MODELS
=
[
((
"distilroberta-base"
,
{
"use_fast"
:
False
}),
"distilroberta-base"
,
None
),
(
"patrickvonplaten/t5-tiny-random"
,
"translation_en_to_de"
),
(
"patrickvonplaten/t5-tiny-random"
,
"translation_en_to_ro"
),
]
]
TF_TRANSLATION_FINETUNED_MODELS
=
[(
"patrickvonplaten/t5-tiny-random"
,
"translation_en_to_fr"
)]
TF_FILL_MASK_FINETUNED_MODELS
=
[
expected_fill_mask_result
=
[
((
"distilroberta-base"
,
{
"use_fast"
:
False
}),
"distilroberta-base"
,
None
),
[
{
"sequence"
:
"<s> My name is:</s>"
,
"score"
:
0.009954338893294334
,
"token"
:
35
},
{
"sequence"
:
"<s> My name is John</s>"
,
"score"
:
0.0080940006300807
,
"token"
:
610
},
],
[
{
"sequence"
:
"<s> The largest city in France is Paris</s>"
,
"score"
:
0.3185044229030609
,
"token"
:
2201
},
{
"sequence"
:
"<s> The largest city in France is Lyon</s>"
,
"score"
:
0.21112334728240967
,
"token"
:
12790
},
],
]
]
SUMMARIZATION_FINETUNED_MODELS
=
{
(
"sshleifer/bart-tiny-random"
,
"bart-large-cnn"
),
(
"patrickvonplaten/t5-tiny-random"
,
"t5-small"
),
}
TF_SUMMARIZATION_FINETUNED_MODELS
=
{(
"patrickvonplaten/t5-tiny-random"
,
"t5-small"
)}
TRANSLATION_FINETUNED_MODELS
=
{
(
"patrickvonplaten/t5-tiny-random"
,
"t5-small"
,
"translation_en_to_de"
),
(
"patrickvonplaten/t5-tiny-random"
,
"t5-small"
,
"translation_en_to_ro"
),
}
TF_TRANSLATION_FINETUNED_MODELS
=
{(
"patrickvonplaten/t5-tiny-random"
,
"t5-small"
,
"translation_en_to_fr"
)}
class
DefaultArgumentHandlerTestCase
(
unittest
.
TestCase
):
class
DefaultArgumentHandlerTestCase
(
unittest
.
TestCase
):
def
setUp
(
self
)
->
None
:
def
setUp
(
self
)
->
None
:
...
@@ -168,8 +115,8 @@ class MonoColumnInputTestCase(unittest.TestCase):
...
@@ -168,8 +115,8 @@ class MonoColumnInputTestCase(unittest.TestCase):
self
,
self
,
nlp
:
Pipeline
,
nlp
:
Pipeline
,
valid_inputs
:
List
,
valid_inputs
:
List
,
invalid_inputs
:
List
,
output_keys
:
Iterable
[
str
],
output_keys
:
Iterable
[
str
],
invalid_inputs
:
List
=
[
None
],
expected_multi_result
:
Optional
[
List
]
=
None
,
expected_multi_result
:
Optional
[
List
]
=
None
,
expected_check_keys
:
Optional
[
List
[
str
]]
=
None
,
expected_check_keys
:
Optional
[
List
[
str
]]
=
None
,
):
):
...
@@ -206,198 +153,180 @@ class MonoColumnInputTestCase(unittest.TestCase):
...
@@ -206,198 +153,180 @@ class MonoColumnInputTestCase(unittest.TestCase):
self
.
assertRaises
(
Exception
,
nlp
,
invalid_inputs
)
self
.
assertRaises
(
Exception
,
nlp
,
invalid_inputs
)
@
require_torch
@
require_torch
def
test_ner
(
self
):
def
test_
torch_
ner
(
self
):
mandatory_keys
=
{
"entity"
,
"word"
,
"score"
}
mandatory_keys
=
{
"entity"
,
"word"
,
"score"
}
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
invalid_inputs
=
[
None
]
for
model_name
in
NER_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
NER_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"ner"
,
model
=
model_name
,
tokenizer
=
model_name
)
nlp
=
pipeline
(
task
=
"ner"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
@
require_tf
@
require_tf
def
test_tf_ner
(
self
):
def
test_tf_ner
(
self
):
mandatory_keys
=
{
"entity"
,
"word"
,
"score"
}
mandatory_keys
=
{
"entity"
,
"word"
,
"score"
}
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
invalid_inputs
=
[
None
]
for
model_name
in
NER_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_NER_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"ner"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"tf"
)
nlp
=
pipeline
(
task
=
"ner"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
,
framework
=
"tf"
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
@
require_torch
@
require_torch
def
test_sentiment_analysis
(
self
):
def
test_
torch_
sentiment_analysis
(
self
):
mandatory_keys
=
{
"label"
,
"score"
}
mandatory_keys
=
{
"label"
,
"score"
}
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
invalid_inputs
=
[
None
]
for
model_name
in
TEXT_CLASSIF_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TEXT_CLASSIF_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"sentiment-analysis"
,
model
=
model_name
,
tokenizer
=
model_name
)
nlp
=
pipeline
(
task
=
"sentiment-analysis"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
@
require_tf
@
require_tf
def
test_tf_sentiment_analysis
(
self
):
def
test_tf_sentiment_analysis
(
self
):
mandatory_keys
=
{
"label"
,
"score"
}
mandatory_keys
=
{
"label"
,
"score"
}
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
invalid_inputs
=
[
None
]
for
model_name
in
TEXT_CLASSIF_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_TEXT_CLASSIF_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"sentiment-analysis"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"tf"
)
nlp
=
pipeline
(
task
=
"sentiment-analysis"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
,
framework
=
"tf"
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
@
require_torch
@
require_torch
def
test_feature_extraction
(
self
):
def
test_
torch_
feature_extraction
(
self
):
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
invalid_inputs
=
[
None
]
for
model_name
in
FEATURE_EXTRACT_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
FEATURE_EXTRACT_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"feature-extraction"
,
model
=
model_name
,
tokenizer
=
model_name
)
nlp
=
pipeline
(
task
=
"feature-extraction"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
{})
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{})
@
require_tf
@
require_tf
def
test_tf_feature_extraction
(
self
):
def
test_tf_feature_extraction
(
self
):
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
valid_inputs
=
[
"HuggingFace is solving NLP one commit at a time."
,
"HuggingFace is based in New-York & Paris"
]
invalid_inputs
=
[
None
]
for
model_name
in
FEATURE_EXTRACT_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_FEATURE_EXTRACT_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"feature-extraction"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"tf"
)
nlp
=
pipeline
(
task
=
"feature-extraction"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
,
framework
=
"tf"
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
{})
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{})
@
require_torch
@
require_torch
def
test_fill_mask
(
self
):
def
test_
torch_
fill_mask
(
self
):
mandatory_keys
=
{
"sequence"
,
"score"
,
"token"
}
mandatory_keys
=
{
"sequence"
,
"score"
,
"token"
}
valid_inputs
=
[
valid_inputs
=
[
"My name is <mask>"
,
"My name is <mask>"
,
"The largest city in France is <mask>"
,
"The largest city in France is <mask>"
,
]
]
invalid_inputs
=
[
None
]
for
model_name
in
FILL_MASK_FINETUNED_MODELS
:
expected_multi_result
=
[
nlp
=
pipeline
(
task
=
"fill-mask"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"pt"
,
topk
=
2
,)
[
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
,
expected_check_keys
=
[
"sequence"
])
{
"sequence"
:
"<s> My name is:</s>"
,
"score"
:
0.009954338893294334
,
"token"
:
35
},
{
"sequence"
:
"<s> My name is John</s>"
,
"score"
:
0.0080940006300807
,
"token"
:
610
},
@
require_tf
],
def
test_tf_fill_mask
(
self
):
[
mandatory_keys
=
{
"sequence"
,
"score"
,
"token"
}
{
valid_inputs
=
[
"sequence"
:
"<s> The largest city in France is Paris</s>"
,
"My name is <mask>"
,
"score"
:
0.3185044229030609
,
"The largest city in France is <mask>"
,
"token"
:
2201
,
]
},
for
model_name
in
FILL_MASK_FINETUNED_MODELS
:
{
nlp
=
pipeline
(
task
=
"fill-mask"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"tf"
,
topk
=
2
,)
"sequence"
:
"<s> The largest city in France is Lyon</s>"
,
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
,
expected_check_keys
=
[
"sequence"
])
"score"
:
0.21112334728240967
,
"token"
:
12790
,
@
require_torch
},
@
slow
],
def
test_torch_fill_mask_results
(
self
):
mandatory_keys
=
{
"sequence"
,
"score"
,
"token"
}
valid_inputs
=
[
"My name is <mask>"
,
"The largest city in France is <mask>"
,
]
]
for
tokenizer
,
model
,
config
in
FILL_MASK_FINETUNED_MODELS
:
for
model_name
in
LARGE_
FILL_MASK_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"fill-mask"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
,
topk
=
2
)
nlp
=
pipeline
(
task
=
"fill-mask"
,
model
=
model
_name
,
tokenizer
=
model_name
,
framework
=
"pt"
,
topk
=
2
,
)
self
.
_test_mono_column_pipeline
(
self
.
_test_mono_column_pipeline
(
nlp
,
nlp
,
valid_inputs
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
,
mandatory_keys
,
expected_multi_result
=
expected_
multi
_result
,
expected_multi_result
=
expected_
fill_mask
_result
,
expected_check_keys
=
[
"sequence"
],
expected_check_keys
=
[
"sequence"
],
)
)
@
require_tf
@
require_tf
def
test_tf_fill_mask
(
self
):
@
slow
def
test_tf_fill_mask_results
(
self
):
mandatory_keys
=
{
"sequence"
,
"score"
,
"token"
}
mandatory_keys
=
{
"sequence"
,
"score"
,
"token"
}
valid_inputs
=
[
valid_inputs
=
[
"My name is <mask>"
,
"My name is <mask>"
,
"The largest city in France is <mask>"
,
"The largest city in France is <mask>"
,
]
]
invalid_inputs
=
[
None
]
for
model_name
in
LARGE_FILL_MASK_FINETUNED_MODELS
:
expected_multi_result
=
[
nlp
=
pipeline
(
task
=
"fill-mask"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"tf"
,
topk
=
2
)
[
{
"sequence"
:
"<s> My name is:</s>"
,
"score"
:
0.009954338893294334
,
"token"
:
35
},
{
"sequence"
:
"<s> My name is John</s>"
,
"score"
:
0.0080940006300807
,
"token"
:
610
},
],
[
{
"sequence"
:
"<s> The largest city in France is Paris</s>"
,
"score"
:
0.3185044229030609
,
"token"
:
2201
,
},
{
"sequence"
:
"<s> The largest city in France is Lyon</s>"
,
"score"
:
0.21112334728240967
,
"token"
:
12790
,
},
],
]
for
tokenizer
,
model
,
config
in
TF_FILL_MASK_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"fill-mask"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
,
framework
=
"tf"
,
topk
=
2
)
self
.
_test_mono_column_pipeline
(
self
.
_test_mono_column_pipeline
(
nlp
,
nlp
,
valid_inputs
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
,
mandatory_keys
,
expected_multi_result
=
expected_
multi
_result
,
expected_multi_result
=
expected_
fill_mask
_result
,
expected_check_keys
=
[
"sequence"
],
expected_check_keys
=
[
"sequence"
],
)
)
@
require_torch
@
require_torch
def
test_summarization
(
self
):
def
test_
torch_
summarization
(
self
):
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
invalid_inputs
=
[
4
,
"<mask>"
]
invalid_inputs
=
[
4
,
"<mask>"
]
mandatory_keys
=
[
"summary_text"
]
mandatory_keys
=
[
"summary_text"
]
for
model
,
tokenizer
in
SUMMARIZATION_FINETUNED_MODELS
:
for
model
in
SUMMARIZATION_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"summarization"
,
model
=
model
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
"summarization"
,
model
=
model
,
tokenizer
=
model
)
self
.
_test_mono_column_pipeline
(
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
,
invalid_inputs
=
invalid_inputs
)
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
,
)
@
require_tf
@
require_tf
def
test_tf_summarization
(
self
):
def
test_tf_summarization
(
self
):
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
invalid_inputs
=
[
4
,
"<mask>"
]
invalid_inputs
=
[
4
,
"<mask>"
]
mandatory_keys
=
[
"summary_text"
]
mandatory_keys
=
[
"summary_text"
]
for
model
,
tokenizer
in
TF_SUMMARIZATION_FINETUNED_MODELS
:
for
model_name
in
TF_SUMMARIZATION_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"summarization"
,
model
=
model
,
tokenizer
=
tokenizer
,
framework
=
"tf"
)
nlp
=
pipeline
(
task
=
"summarization"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"tf"
,)
self
.
_test_mono_column_pipeline
(
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
,
invalid_inputs
=
invalid_inputs
)
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
,
)
@
require_torch
@
require_torch
def
test_translation
(
self
):
def
test_
torch_
translation
(
self
):
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
invalid_inputs
=
[
4
,
"<mask>"
]
invalid_inputs
=
[
4
,
"<mask>"
]
mandatory_keys
=
[
"translation_text"
]
mandatory_keys
=
[
"translation_text"
]
for
model
,
tokenizer
,
task
in
TRANSLATION_FINETUNED_MODELS
:
for
model_name
,
task
in
TRANSLATION_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
task
,
model
=
model
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
task
,
model
=
model_name
,
tokenizer
=
model_name
)
self
.
_test_mono_column_pipeline
(
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
,
invalid_inputs
)
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
,
)
@
require_tf
@
require_tf
@
slow
def
test_tf_translation
(
self
):
def
test_tf_translation
(
self
):
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
invalid_inputs
=
[
4
,
"<mask>"
]
invalid_inputs
=
[
4
,
"<mask>"
]
mandatory_keys
=
[
"translation_text"
]
mandatory_keys
=
[
"translation_text"
]
for
model
,
tokenizer
,
task
in
TF_TRANSLATION_FINETUNED_MODELS
:
for
model
,
task
in
TF_TRANSLATION_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
task
,
model
=
model
,
tokenizer
=
tokenizer
,
framework
=
"tf"
)
nlp
=
pipeline
(
task
=
task
,
model
=
model
,
tokenizer
=
model
,
framework
=
"tf"
)
self
.
_test_mono_column_pipeline
(
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
mandatory_keys
,
invalid_inputs
=
invalid_inputs
)
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
,
)
@
require_torch
@
require_torch
def
test_text_generation
(
self
):
def
test_
torch_
text_generation
(
self
):
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
invalid_inputs
=
[
None
]
for
model_name
in
TEXT_GENERATION_FINETUNED_MODELS
:
for
model
,
tokenizer
in
TEXT_GENERATION_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"text-generation"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"pt"
)
nlp
=
pipeline
(
task
=
"text-generation"
,
model
=
model
,
tokenizer
=
tokenizer
,
framework
=
"pt"
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
{})
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{},
)
@
require_tf
@
require_tf
def
test_tf_text_generation
(
self
):
def
test_tf_text_generation
(
self
):
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
valid_inputs
=
[
"A string like this"
,
[
"list of strings entry 1"
,
"list of strings v2"
]]
invalid_inputs
=
[
None
]
for
model_name
in
TEXT_GENERATION_FINETUNED_MODELS
:
for
model
,
tokenizer
in
TF_TEXT_GENERATION_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"text-generation"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"tf"
)
nlp
=
pipeline
(
task
=
"text-generation"
,
model
=
model
,
tokenizer
=
tokenizer
,
framework
=
"tf"
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
{})
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{},
)
QA_FINETUNED_MODELS
=
[
"sshleifer/tiny-distilbert-base-cased-distilled-squad"
]
class
MultiColumnInputTestCase
(
unittest
.
TestCase
):
def
_test_multicolumn_pipeline
(
self
,
nlp
,
valid_inputs
:
list
,
invalid_inputs
:
list
,
output_keys
:
Iterable
[
str
]):
class
QAPipelineTests
(
unittest
.
TestCase
):
def
_test_qa_pipeline
(
self
,
nlp
):
output_keys
=
{
"score"
,
"answer"
,
"start"
,
"end"
}
valid_inputs
=
[
{
"question"
:
"Where was HuggingFace founded ?"
,
"context"
:
"HuggingFace was founded in Paris."
},
{
"question"
:
"In what field is HuggingFace working ?"
,
"context"
:
"HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP."
,
},
]
invalid_inputs
=
[
{
"question"
:
""
,
"context"
:
"This is a test to try empty question edge case"
},
{
"question"
:
None
,
"context"
:
"This is a test to try empty question edge case"
},
{
"question"
:
"What is does with empty context ?"
,
"context"
:
""
},
{
"question"
:
"What is does with empty context ?"
,
"context"
:
None
},
]
self
.
assertIsNotNone
(
nlp
)
self
.
assertIsNotNone
(
nlp
)
mono_result
=
nlp
(
valid_inputs
[
0
])
mono_result
=
nlp
(
valid_inputs
[
0
])
...
@@ -413,75 +342,33 @@ class MultiColumnInputTestCase(unittest.TestCase):
...
@@ -413,75 +342,33 @@ class MultiColumnInputTestCase(unittest.TestCase):
for
result
in
multi_result
:
for
result
in
multi_result
:
for
key
in
output_keys
:
for
key
in
output_keys
:
self
.
assertIn
(
key
,
result
)
self
.
assertIn
(
key
,
result
)
for
bad_input
in
invalid_inputs
:
self
.
assertRaises
(
Exception
,
nlp
,
invali
d_input
s
[
0
]
)
self
.
assertRaises
(
Exception
,
nlp
,
ba
d_input
)
self
.
assertRaises
(
Exception
,
nlp
,
invalid_inputs
)
self
.
assertRaises
(
Exception
,
nlp
,
invalid_inputs
)
@
require_torch
@
require_torch
def
test_question_answering
(
self
):
def
test_torch_question_answering
(
self
):
mandatory_output_keys
=
{
"score"
,
"answer"
,
"start"
,
"end"
}
for
model_name
in
QA_FINETUNED_MODELS
:
valid_samples
=
[
nlp
=
pipeline
(
task
=
"question-answering"
,
model
=
model_name
,
tokenizer
=
model_name
)
{
"question"
:
"Where was HuggingFace founded ?"
,
"context"
:
"HuggingFace was founded in Paris."
},
self
.
_test_qa_pipeline
(
nlp
)
{
"question"
:
"In what field is HuggingFace working ?"
,
"context"
:
"HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP."
,
},
]
invalid_samples
=
[
{
"question"
:
""
,
"context"
:
"This is a test to try empty question edge case"
},
{
"question"
:
None
,
"context"
:
"This is a test to try empty question edge case"
},
{
"question"
:
"What is does with empty context ?"
,
"context"
:
""
},
{
"question"
:
"What is does with empty context ?"
,
"context"
:
None
},
]
for
tokenizer
,
model
,
config
in
QA_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"question-answering"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_multicolumn_pipeline
(
nlp
,
valid_samples
,
invalid_samples
,
mandatory_output_keys
)
@
require_tf
@
require_tf
@
slow
def
test_tf_question_answering
(
self
):
def
test_tf_question_answering
(
self
):
mandatory_output_keys
=
{
"score"
,
"answer"
,
"start"
,
"end"
}
for
model_name
in
QA_FINETUNED_MODELS
:
valid_samples
=
[
nlp
=
pipeline
(
task
=
"question-answering"
,
model
=
model_name
,
tokenizer
=
model_name
,
framework
=
"tf"
)
{
"question"
:
"Where was HuggingFace founded ?"
,
"context"
:
"HuggingFace was founded in Paris."
},
self
.
_test_qa_pipeline
(
nlp
)
{
"question"
:
"In what field is HuggingFace working ?"
,
"context"
:
"HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP."
,
},
]
invalid_samples
=
[
{
"question"
:
""
,
"context"
:
"This is a test to try empty question edge case"
},
{
"question"
:
None
,
"context"
:
"This is a test to try empty question edge case"
},
{
"question"
:
"What is does with empty context ?"
,
"context"
:
""
},
{
"question"
:
"What is does with empty context ?"
,
"context"
:
None
},
]
for
tokenizer
,
model
,
config
in
TF_QA_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
"question-answering"
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
,
framework
=
"tf"
)
self
.
_test_multicolumn_pipeline
(
nlp
,
valid_samples
,
invalid_samples
,
mandatory_output_keys
)
class
PipelineCommonTests
(
unittest
.
TestCase
):
class
PipelineCommonTests
(
unittest
.
TestCase
):
pipelines
=
(
pipelines
=
SUPPORTED_TASKS
.
keys
()
"ner"
,
"feature-extraction"
,
"question-answering"
,
"fill-mask"
,
"summarization"
,
"sentiment-analysis"
,
"translation_en_to_fr"
,
"translation_en_to_de"
,
"translation_en_to_ro"
,
"text-generation"
,
)
@
slow
@
slow
@
require_tf
@
require_tf
def
test_tf_defaults
(
self
):
def
test_tf_defaults
(
self
):
# Test that pipelines can be correctly loaded without any argument
# Test that pipelines can be correctly loaded without any argument
for
task
in
self
.
pipelines
:
for
task
in
self
.
pipelines
:
with
self
.
subTest
(
msg
=
"Testing T
orch
defaults with
PyTorch
and {}"
.
format
(
task
)):
with
self
.
subTest
(
msg
=
"Testing T
F
defaults with
TF
and {}"
.
format
(
task
)):
pipeline
(
task
,
framework
=
"tf"
)
pipeline
(
task
,
framework
=
"tf"
)
@
slow
@
slow
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment