"...kernels/git@developer.sourcefind.cn:change/sglang.git" did not exist on "9cf407729441a18561ee5a8b2c1e3e535ba817f9"
Unverified Commit 7822cd38 authored by Sam Shleifer's avatar Sam Shleifer Committed by GitHub
Browse files

[tests] make pipelines tests faster with smaller models (#4238)

covers torch and tf. Also fixes a failing @slow test
parent 448c4672
...@@ -1513,7 +1513,7 @@ class TranslationPipeline(Pipeline): ...@@ -1513,7 +1513,7 @@ class TranslationPipeline(Pipeline):
return results return results
# Register all the supported task here # Register all the supported tasks here
SUPPORTED_TASKS = { SUPPORTED_TASKS = {
"feature-extraction": { "feature-extraction": {
"impl": FeatureExtractionPipeline, "impl": FeatureExtractionPipeline,
...@@ -1576,9 +1576,9 @@ SUPPORTED_TASKS = { ...@@ -1576,9 +1576,9 @@ SUPPORTED_TASKS = {
"tf": TFAutoModelWithLMHead if is_tf_available() else None, "tf": TFAutoModelWithLMHead if is_tf_available() else None,
"pt": AutoModelWithLMHead if is_torch_available() else None, "pt": AutoModelWithLMHead if is_torch_available() else None,
"default": { "default": {
"model": {"pt": "bart-large-cnn", "tf": None}, "model": {"pt": "bart-large-cnn", "tf": "t5-small"},
"config": None, "config": None,
"tokenizer": ("bart-large-cnn", {"use_fast": False}), "tokenizer": {"pt": ("bart-large-cnn", {"use_fast": False}), "tf": "t5-small"},
}, },
}, },
"translation_en_to_fr": { "translation_en_to_fr": {
......
...@@ -2,93 +2,40 @@ import unittest ...@@ -2,93 +2,40 @@ import unittest
from typing import Iterable, List, Optional from typing import Iterable, List, Optional
from transformers import pipeline from transformers import pipeline
from transformers.pipelines import DefaultArgumentHandler, Pipeline from transformers.pipelines import SUPPORTED_TASKS, DefaultArgumentHandler, Pipeline
from .utils import require_tf, require_torch, slow from .utils import require_tf, require_torch, slow
QA_FINETUNED_MODELS = [ NER_FINETUNED_MODELS = ["sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"]
(("bert-base-uncased", {"use_fast": False}), "bert-large-uncased-whole-word-masking-finetuned-squad", None),
(("distilbert-base-cased-distilled-squad", {"use_fast": False}), "distilbert-base-cased-distilled-squad", None),
]
TF_QA_FINETUNED_MODELS = [ # xlnet-base-cased disabled for now, since it crashes TF2
(("bert-base-uncased", {"use_fast": False}), "bert-large-uncased-whole-word-masking-finetuned-squad", None), FEATURE_EXTRACT_FINETUNED_MODELS = ["sshleifer/tiny-distilbert-base-cased"]
(("distilbert-base-cased-distilled-squad", {"use_fast": False}), "distilbert-base-cased-distilled-squad", None), TEXT_CLASSIF_FINETUNED_MODELS = ["sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english"]
] TEXT_GENERATION_FINETUNED_MODELS = ["sshleifer/tiny-ctrl"]
TF_NER_FINETUNED_MODELS = { FILL_MASK_FINETUNED_MODELS = ["sshleifer/tiny-distilroberta-base"]
( LARGE_FILL_MASK_FINETUNED_MODELS = ["distilroberta-base"] # @slow
"bert-base-cased",
"dbmdz/bert-large-cased-finetuned-conll03-english",
"dbmdz/bert-large-cased-finetuned-conll03-english",
)
}
NER_FINETUNED_MODELS = {
(
"bert-base-cased",
"dbmdz/bert-large-cased-finetuned-conll03-english",
"dbmdz/bert-large-cased-finetuned-conll03-english",
)
}
FEATURE_EXTRACT_FINETUNED_MODELS = {
("bert-base-cased", "bert-base-cased", None),
# ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
("distilbert-base-cased", "distilbert-base-cased", None),
}
TF_FEATURE_EXTRACT_FINETUNED_MODELS = {
# ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
("distilbert-base-cased", "distilbert-base-cased", None),
}
TF_TEXT_CLASSIF_FINETUNED_MODELS = {
(
"bert-base-uncased",
"distilbert-base-uncased-finetuned-sst-2-english",
"distilbert-base-uncased-finetuned-sst-2-english",
)
}
TEXT_CLASSIF_FINETUNED_MODELS = {
(
"distilbert-base-cased",
"distilbert-base-uncased-finetuned-sst-2-english",
"distilbert-base-uncased-finetuned-sst-2-english",
)
}
TEXT_GENERATION_FINETUNED_MODELS = {
("gpt2", "gpt2"),
("xlnet-base-cased", "xlnet-base-cased"),
}
TF_TEXT_GENERATION_FINETUNED_MODELS = {
("gpt2", "gpt2"),
("xlnet-base-cased", "xlnet-base-cased"),
}
FILL_MASK_FINETUNED_MODELS = [
(("distilroberta-base", {"use_fast": False}), "distilroberta-base", None),
]
TF_FILL_MASK_FINETUNED_MODELS = [
(("distilroberta-base", {"use_fast": False}), "distilroberta-base", None),
]
SUMMARIZATION_FINETUNED_MODELS = { SUMMARIZATION_FINETUNED_MODELS = ["sshleifer/bart-tiny-random", "patrickvonplaten/t5-tiny-random"]
("sshleifer/bart-tiny-random", "bart-large-cnn"), TF_SUMMARIZATION_FINETUNED_MODELS = ["patrickvonplaten/t5-tiny-random"]
("patrickvonplaten/t5-tiny-random", "t5-small"),
}
TF_SUMMARIZATION_FINETUNED_MODELS = {("patrickvonplaten/t5-tiny-random", "t5-small")}
TRANSLATION_FINETUNED_MODELS = { TRANSLATION_FINETUNED_MODELS = [
("patrickvonplaten/t5-tiny-random", "t5-small", "translation_en_to_de"), ("patrickvonplaten/t5-tiny-random", "translation_en_to_de"),
("patrickvonplaten/t5-tiny-random", "t5-small", "translation_en_to_ro"), ("patrickvonplaten/t5-tiny-random", "translation_en_to_ro"),
} ]
TF_TRANSLATION_FINETUNED_MODELS = {("patrickvonplaten/t5-tiny-random", "t5-small", "translation_en_to_fr")} TF_TRANSLATION_FINETUNED_MODELS = [("patrickvonplaten/t5-tiny-random", "translation_en_to_fr")]
expected_fill_mask_result = [
[
{"sequence": "<s> My name is:</s>", "score": 0.009954338893294334, "token": 35},
{"sequence": "<s> My name is John</s>", "score": 0.0080940006300807, "token": 610},
],
[
{"sequence": "<s> The largest city in France is Paris</s>", "score": 0.3185044229030609, "token": 2201},
{"sequence": "<s> The largest city in France is Lyon</s>", "score": 0.21112334728240967, "token": 12790},
],
]
class DefaultArgumentHandlerTestCase(unittest.TestCase): class DefaultArgumentHandlerTestCase(unittest.TestCase):
...@@ -168,8 +115,8 @@ class MonoColumnInputTestCase(unittest.TestCase): ...@@ -168,8 +115,8 @@ class MonoColumnInputTestCase(unittest.TestCase):
self, self,
nlp: Pipeline, nlp: Pipeline,
valid_inputs: List, valid_inputs: List,
invalid_inputs: List,
output_keys: Iterable[str], output_keys: Iterable[str],
invalid_inputs: List = [None],
expected_multi_result: Optional[List] = None, expected_multi_result: Optional[List] = None,
expected_check_keys: Optional[List[str]] = None, expected_check_keys: Optional[List[str]] = None,
): ):
...@@ -206,198 +153,180 @@ class MonoColumnInputTestCase(unittest.TestCase): ...@@ -206,198 +153,180 @@ class MonoColumnInputTestCase(unittest.TestCase):
self.assertRaises(Exception, nlp, invalid_inputs) self.assertRaises(Exception, nlp, invalid_inputs)
@require_torch @require_torch
def test_ner(self): def test_torch_ner(self):
mandatory_keys = {"entity", "word", "score"} mandatory_keys = {"entity", "word", "score"}
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"] valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
invalid_inputs = [None] for model_name in NER_FINETUNED_MODELS:
for tokenizer, model, config in NER_FINETUNED_MODELS: nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
nlp = pipeline(task="ner", model=model, config=config, tokenizer=tokenizer) self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys)
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
@require_tf @require_tf
def test_tf_ner(self): def test_tf_ner(self):
mandatory_keys = {"entity", "word", "score"} mandatory_keys = {"entity", "word", "score"}
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"] valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
invalid_inputs = [None] for model_name in NER_FINETUNED_MODELS:
for tokenizer, model, config in TF_NER_FINETUNED_MODELS: nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, framework="tf")
nlp = pipeline(task="ner", model=model, config=config, tokenizer=tokenizer, framework="tf") self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys)
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
@require_torch @require_torch
def test_sentiment_analysis(self): def test_torch_sentiment_analysis(self):
mandatory_keys = {"label", "score"} mandatory_keys = {"label", "score"}
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"] valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
invalid_inputs = [None] for model_name in TEXT_CLASSIF_FINETUNED_MODELS:
for tokenizer, model, config in TEXT_CLASSIF_FINETUNED_MODELS: nlp = pipeline(task="sentiment-analysis", model=model_name, tokenizer=model_name)
nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer) self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys)
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
@require_tf @require_tf
def test_tf_sentiment_analysis(self): def test_tf_sentiment_analysis(self):
mandatory_keys = {"label", "score"} mandatory_keys = {"label", "score"}
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"] valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
invalid_inputs = [None] for model_name in TEXT_CLASSIF_FINETUNED_MODELS:
for tokenizer, model, config in TF_TEXT_CLASSIF_FINETUNED_MODELS: nlp = pipeline(task="sentiment-analysis", model=model_name, tokenizer=model_name, framework="tf")
nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer, framework="tf") self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys)
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
@require_torch @require_torch
def test_feature_extraction(self): def test_torch_feature_extraction(self):
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"] valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
invalid_inputs = [None] for model_name in FEATURE_EXTRACT_FINETUNED_MODELS:
for tokenizer, model, config in FEATURE_EXTRACT_FINETUNED_MODELS: nlp = pipeline(task="feature-extraction", model=model_name, tokenizer=model_name)
nlp = pipeline(task="feature-extraction", model=model, config=config, tokenizer=tokenizer) self._test_mono_column_pipeline(nlp, valid_inputs, {})
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
@require_tf @require_tf
def test_tf_feature_extraction(self): def test_tf_feature_extraction(self):
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"] valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
invalid_inputs = [None] for model_name in FEATURE_EXTRACT_FINETUNED_MODELS:
for tokenizer, model, config in TF_FEATURE_EXTRACT_FINETUNED_MODELS: nlp = pipeline(task="feature-extraction", model=model_name, tokenizer=model_name, framework="tf")
nlp = pipeline(task="feature-extraction", model=model, config=config, tokenizer=tokenizer, framework="tf") self._test_mono_column_pipeline(nlp, valid_inputs, {})
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
@require_torch @require_torch
def test_fill_mask(self): def test_torch_fill_mask(self):
mandatory_keys = {"sequence", "score", "token"}
valid_inputs = [
"My name is <mask>",
"The largest city in France is <mask>",
]
for model_name in FILL_MASK_FINETUNED_MODELS:
nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="pt", topk=2,)
self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys, expected_check_keys=["sequence"])
@require_tf
def test_tf_fill_mask(self):
mandatory_keys = {"sequence", "score", "token"} mandatory_keys = {"sequence", "score", "token"}
valid_inputs = [ valid_inputs = [
"My name is <mask>", "My name is <mask>",
"The largest city in France is <mask>", "The largest city in France is <mask>",
] ]
invalid_inputs = [None] for model_name in FILL_MASK_FINETUNED_MODELS:
expected_multi_result = [ nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf", topk=2,)
[ self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys, expected_check_keys=["sequence"])
{"sequence": "<s> My name is:</s>", "score": 0.009954338893294334, "token": 35},
{"sequence": "<s> My name is John</s>", "score": 0.0080940006300807, "token": 610}, @require_torch
], @slow
[ def test_torch_fill_mask_results(self):
{ mandatory_keys = {"sequence", "score", "token"}
"sequence": "<s> The largest city in France is Paris</s>", valid_inputs = [
"score": 0.3185044229030609, "My name is <mask>",
"token": 2201, "The largest city in France is <mask>",
},
{
"sequence": "<s> The largest city in France is Lyon</s>",
"score": 0.21112334728240967,
"token": 12790,
},
],
] ]
for tokenizer, model, config in FILL_MASK_FINETUNED_MODELS: for model_name in LARGE_FILL_MASK_FINETUNED_MODELS:
nlp = pipeline(task="fill-mask", model=model, config=config, tokenizer=tokenizer, topk=2) nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="pt", topk=2,)
self._test_mono_column_pipeline( self._test_mono_column_pipeline(
nlp, nlp,
valid_inputs, valid_inputs,
invalid_inputs,
mandatory_keys, mandatory_keys,
expected_multi_result=expected_multi_result, expected_multi_result=expected_fill_mask_result,
expected_check_keys=["sequence"], expected_check_keys=["sequence"],
) )
@require_tf @require_tf
def test_tf_fill_mask(self): @slow
def test_tf_fill_mask_results(self):
mandatory_keys = {"sequence", "score", "token"} mandatory_keys = {"sequence", "score", "token"}
valid_inputs = [ valid_inputs = [
"My name is <mask>", "My name is <mask>",
"The largest city in France is <mask>", "The largest city in France is <mask>",
] ]
invalid_inputs = [None] for model_name in LARGE_FILL_MASK_FINETUNED_MODELS:
expected_multi_result = [ nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf", topk=2)
[
{"sequence": "<s> My name is:</s>", "score": 0.009954338893294334, "token": 35},
{"sequence": "<s> My name is John</s>", "score": 0.0080940006300807, "token": 610},
],
[
{
"sequence": "<s> The largest city in France is Paris</s>",
"score": 0.3185044229030609,
"token": 2201,
},
{
"sequence": "<s> The largest city in France is Lyon</s>",
"score": 0.21112334728240967,
"token": 12790,
},
],
]
for tokenizer, model, config in TF_FILL_MASK_FINETUNED_MODELS:
nlp = pipeline(task="fill-mask", model=model, config=config, tokenizer=tokenizer, framework="tf", topk=2)
self._test_mono_column_pipeline( self._test_mono_column_pipeline(
nlp, nlp,
valid_inputs, valid_inputs,
invalid_inputs,
mandatory_keys, mandatory_keys,
expected_multi_result=expected_multi_result, expected_multi_result=expected_fill_mask_result,
expected_check_keys=["sequence"], expected_check_keys=["sequence"],
) )
@require_torch @require_torch
def test_summarization(self): def test_torch_summarization(self):
valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]] valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
invalid_inputs = [4, "<mask>"] invalid_inputs = [4, "<mask>"]
mandatory_keys = ["summary_text"] mandatory_keys = ["summary_text"]
for model, tokenizer in SUMMARIZATION_FINETUNED_MODELS: for model in SUMMARIZATION_FINETUNED_MODELS:
nlp = pipeline(task="summarization", model=model, tokenizer=tokenizer) nlp = pipeline(task="summarization", model=model, tokenizer=model)
self._test_mono_column_pipeline( self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys, invalid_inputs=invalid_inputs)
nlp, valid_inputs, invalid_inputs, mandatory_keys,
)
@require_tf @require_tf
def test_tf_summarization(self): def test_tf_summarization(self):
valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]] valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
invalid_inputs = [4, "<mask>"] invalid_inputs = [4, "<mask>"]
mandatory_keys = ["summary_text"] mandatory_keys = ["summary_text"]
for model, tokenizer in TF_SUMMARIZATION_FINETUNED_MODELS: for model_name in TF_SUMMARIZATION_FINETUNED_MODELS:
nlp = pipeline(task="summarization", model=model, tokenizer=tokenizer, framework="tf") nlp = pipeline(task="summarization", model=model_name, tokenizer=model_name, framework="tf",)
self._test_mono_column_pipeline( self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys, invalid_inputs=invalid_inputs)
nlp, valid_inputs, invalid_inputs, mandatory_keys,
)
@require_torch @require_torch
def test_translation(self): def test_torch_translation(self):
valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]] valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
invalid_inputs = [4, "<mask>"] invalid_inputs = [4, "<mask>"]
mandatory_keys = ["translation_text"] mandatory_keys = ["translation_text"]
for model, tokenizer, task in TRANSLATION_FINETUNED_MODELS: for model_name, task in TRANSLATION_FINETUNED_MODELS:
nlp = pipeline(task=task, model=model, tokenizer=tokenizer) nlp = pipeline(task=task, model=model_name, tokenizer=model_name)
self._test_mono_column_pipeline( self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys, invalid_inputs)
nlp, valid_inputs, invalid_inputs, mandatory_keys,
)
@require_tf @require_tf
@slow
def test_tf_translation(self): def test_tf_translation(self):
valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]] valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
invalid_inputs = [4, "<mask>"] invalid_inputs = [4, "<mask>"]
mandatory_keys = ["translation_text"] mandatory_keys = ["translation_text"]
for model, tokenizer, task in TF_TRANSLATION_FINETUNED_MODELS: for model, task in TF_TRANSLATION_FINETUNED_MODELS:
nlp = pipeline(task=task, model=model, tokenizer=tokenizer, framework="tf") nlp = pipeline(task=task, model=model, tokenizer=model, framework="tf")
self._test_mono_column_pipeline( self._test_mono_column_pipeline(nlp, valid_inputs, mandatory_keys, invalid_inputs=invalid_inputs)
nlp, valid_inputs, invalid_inputs, mandatory_keys,
)
@require_torch @require_torch
def test_text_generation(self): def test_torch_text_generation(self):
valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]] valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
invalid_inputs = [None] for model_name in TEXT_GENERATION_FINETUNED_MODELS:
for model, tokenizer in TEXT_GENERATION_FINETUNED_MODELS: nlp = pipeline(task="text-generation", model=model_name, tokenizer=model_name, framework="pt")
nlp = pipeline(task="text-generation", model=model, tokenizer=tokenizer, framework="pt") self._test_mono_column_pipeline(nlp, valid_inputs, {})
self._test_mono_column_pipeline(
nlp, valid_inputs, invalid_inputs, {},
)
@require_tf @require_tf
def test_tf_text_generation(self): def test_tf_text_generation(self):
valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]] valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
invalid_inputs = [None] for model_name in TEXT_GENERATION_FINETUNED_MODELS:
for model, tokenizer in TF_TEXT_GENERATION_FINETUNED_MODELS: nlp = pipeline(task="text-generation", model=model_name, tokenizer=model_name, framework="tf")
nlp = pipeline(task="text-generation", model=model, tokenizer=tokenizer, framework="tf") self._test_mono_column_pipeline(nlp, valid_inputs, {})
self._test_mono_column_pipeline(
nlp, valid_inputs, invalid_inputs, {},
) QA_FINETUNED_MODELS = ["sshleifer/tiny-distilbert-base-cased-distilled-squad"]
class MultiColumnInputTestCase(unittest.TestCase): class QAPipelineTests(unittest.TestCase):
def _test_multicolumn_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]): def _test_qa_pipeline(self, nlp):
output_keys = {"score", "answer", "start", "end"}
valid_inputs = [
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
{
"question": "In what field is HuggingFace working ?",
"context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
},
]
invalid_inputs = [
{"question": "", "context": "This is a test to try empty question edge case"},
{"question": None, "context": "This is a test to try empty question edge case"},
{"question": "What is does with empty context ?", "context": ""},
{"question": "What is does with empty context ?", "context": None},
]
self.assertIsNotNone(nlp) self.assertIsNotNone(nlp)
mono_result = nlp(valid_inputs[0]) mono_result = nlp(valid_inputs[0])
...@@ -413,75 +342,33 @@ class MultiColumnInputTestCase(unittest.TestCase): ...@@ -413,75 +342,33 @@ class MultiColumnInputTestCase(unittest.TestCase):
for result in multi_result: for result in multi_result:
for key in output_keys: for key in output_keys:
self.assertIn(key, result) self.assertIn(key, result)
for bad_input in invalid_inputs:
self.assertRaises(Exception, nlp, invalid_inputs[0]) self.assertRaises(Exception, nlp, bad_input)
self.assertRaises(Exception, nlp, invalid_inputs) self.assertRaises(Exception, nlp, invalid_inputs)
@require_torch @require_torch
def test_question_answering(self): def test_torch_question_answering(self):
mandatory_output_keys = {"score", "answer", "start", "end"} for model_name in QA_FINETUNED_MODELS:
valid_samples = [ nlp = pipeline(task="question-answering", model=model_name, tokenizer=model_name)
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."}, self._test_qa_pipeline(nlp)
{
"question": "In what field is HuggingFace working ?",
"context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
},
]
invalid_samples = [
{"question": "", "context": "This is a test to try empty question edge case"},
{"question": None, "context": "This is a test to try empty question edge case"},
{"question": "What is does with empty context ?", "context": ""},
{"question": "What is does with empty context ?", "context": None},
]
for tokenizer, model, config in QA_FINETUNED_MODELS:
nlp = pipeline(task="question-answering", model=model, config=config, tokenizer=tokenizer)
self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
@require_tf @require_tf
@slow
def test_tf_question_answering(self): def test_tf_question_answering(self):
mandatory_output_keys = {"score", "answer", "start", "end"} for model_name in QA_FINETUNED_MODELS:
valid_samples = [ nlp = pipeline(task="question-answering", model=model_name, tokenizer=model_name, framework="tf")
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."}, self._test_qa_pipeline(nlp)
{
"question": "In what field is HuggingFace working ?",
"context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
},
]
invalid_samples = [
{"question": "", "context": "This is a test to try empty question edge case"},
{"question": None, "context": "This is a test to try empty question edge case"},
{"question": "What is does with empty context ?", "context": ""},
{"question": "What is does with empty context ?", "context": None},
]
for tokenizer, model, config in TF_QA_FINETUNED_MODELS:
nlp = pipeline(task="question-answering", model=model, config=config, tokenizer=tokenizer, framework="tf")
self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
class PipelineCommonTests(unittest.TestCase): class PipelineCommonTests(unittest.TestCase):
pipelines = ( pipelines = SUPPORTED_TASKS.keys()
"ner",
"feature-extraction",
"question-answering",
"fill-mask",
"summarization",
"sentiment-analysis",
"translation_en_to_fr",
"translation_en_to_de",
"translation_en_to_ro",
"text-generation",
)
@slow @slow
@require_tf @require_tf
def test_tf_defaults(self): def test_tf_defaults(self):
# Test that pipelines can be correctly loaded without any argument # Test that pipelines can be correctly loaded without any argument
for task in self.pipelines: for task in self.pipelines:
with self.subTest(msg="Testing Torch defaults with PyTorch and {}".format(task)): with self.subTest(msg="Testing TF defaults with TF and {}".format(task)):
pipeline(task, framework="tf") pipeline(task, framework="tf")
@slow @slow
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment