Unverified Commit a6e36558 authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Moving `text-generation` pipeline to new testing framework. (#13285)

* Moving `text-generation` pipeline to new testing framework.

* Keep check_model_type but log instead of raise Exception.

* warning -> error.
parent 0759f251
...@@ -735,10 +735,8 @@ class Pipeline(_ScikitCompat): ...@@ -735,10 +735,8 @@ class Pipeline(_ScikitCompat):
supported_models_names.append(model.__name__) supported_models_names.append(model.__name__)
supported_models = supported_models_names supported_models = supported_models_names
if self.model.__class__.__name__ not in supported_models: if self.model.__class__.__name__ not in supported_models:
raise PipelineException( logger.error(
self.task, f"The model '{self.model.__class__.__name__}' is not supported for {self.task}. Supported models are {supported_models}."
self.model.base_model_prefix,
f"The model '{self.model.__class__.__name__}' is not supported for {self.task}. Supported models are {supported_models}",
) )
def _parse_and_tokenize( def _parse_and_tokenize(
......
from transformers import MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING
from ..file_utils import add_end_docstrings from ..file_utils import add_end_docstrings
from .base import PIPELINE_INIT_ARGS, Pipeline from .base import PIPELINE_INIT_ARGS, Pipeline
...@@ -30,25 +32,12 @@ class TextGenerationPipeline(Pipeline): ...@@ -30,25 +32,12 @@ class TextGenerationPipeline(Pipeline):
begging for his blessing. <eod> </s> <eos> begging for his blessing. <eod> </s> <eos>
""" """
ALLOWED_MODELS = [
"XLNetLMHeadModel",
"TransfoXLLMHeadModel",
"ReformerModelWithLMHead",
"GPT2LMHeadModel",
"GPTNeoForCausalLM",
"OpenAIGPTLMHeadModel",
"CTRLLMHeadModel",
"TFXLNetLMHeadModel",
"TFTransfoXLLMHeadModel",
"TFGPT2LMHeadModel",
"TFOpenAIGPTLMHeadModel",
"TFCTRLLMHeadModel",
]
def __init__(self, *args, return_full_text=True, **kwargs): def __init__(self, *args, return_full_text=True, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.check_model_type(
TF_MODEL_FOR_CAUSAL_LM_MAPPING if self.framework == "tf" else MODEL_FOR_CAUSAL_LM_MAPPING
)
self.check_model_type(self.ALLOWED_MODELS)
self.return_full_text = return_full_text self.return_full_text = return_full_text
# overriding _parse_and_tokenize to allow for unusual language-modeling tokenizer arguments # overriding _parse_and_tokenize to allow for unusual language-modeling tokenizer arguments
...@@ -124,6 +113,9 @@ class TextGenerationPipeline(Pipeline): ...@@ -124,6 +113,9 @@ class TextGenerationPipeline(Pipeline):
prefix_length = prefix_inputs["input_ids"].shape[-1] prefix_length = prefix_inputs["input_ids"].shape[-1]
if generate_kwargs.get("max_length", None) is not None: if generate_kwargs.get("max_length", None) is not None:
generate_kwargs["max_length"] += prefix_length generate_kwargs["max_length"] += prefix_length
else:
generate_kwargs["max_length"] = self.model.config.max_length + prefix_length
if generate_kwargs.get("min_length", None) is not None: if generate_kwargs.get("min_length", None) is not None:
generate_kwargs["min_length"] += prefix_length generate_kwargs["min_length"] += prefix_length
......
...@@ -14,49 +14,95 @@ ...@@ -14,49 +14,95 @@
import unittest import unittest
from transformers import pipeline from transformers import MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, TextGenerationPipeline, pipeline
from transformers.testing_utils import require_torch from transformers.testing_utils import is_pipeline_test, require_tf, require_torch
from .test_pipelines_common import MonoInputPipelineCommonMixin from .test_pipelines_common import ANY, PipelineTestCaseMeta
class TextGenerationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): @is_pipeline_test
pipeline_task = "text-generation" class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
pipeline_running_kwargs = {"prefix": "This is "} model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
small_models = ["sshleifer/tiny-ctrl"] # Models tested without the @slow decorator tf_model_mapping = TF_MODEL_FOR_CAUSAL_LM_MAPPING
large_models = [] # Models tested with the @slow decorator
def test_simple_generation(self): @require_torch
text_generator = pipeline(task="text-generation", model=self.small_models[0]) def test_small_model_pt(self):
# text-generation is non-deterministic by nature, we can't fully test the output text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="pt")
# Using `do_sample=False` to force deterministic output
outputs = text_generator("This is a test", do_sample=False)
self.assertEqual(
outputs,
[
{
"generated_text": "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
}
],
)
outputs = text_generator("This is a test") outputs = text_generator(["This is a test", "This is a second test"])
self.assertEqual(
outputs,
[
[
{
"generated_text": "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
}
],
[
{
"generated_text": "This is a second test ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
}
],
],
)
self.assertEqual(len(outputs), 1) @require_tf
self.assertEqual(list(outputs[0].keys()), ["generated_text"]) def test_small_model_tf(self):
self.assertEqual(type(outputs[0]["generated_text"]), str) text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="tf")
outputs = text_generator(["This is a test", "This is a second test"]) # Using `do_sample=False` to force deterministic output
self.assertEqual(len(outputs[0]), 1) outputs = text_generator("This is a test", do_sample=False)
self.assertEqual(list(outputs[0][0].keys()), ["generated_text"]) self.assertEqual(
self.assertEqual(type(outputs[0][0]["generated_text"]), str) outputs,
self.assertEqual(list(outputs[1][0].keys()), ["generated_text"]) [
self.assertEqual(type(outputs[1][0]["generated_text"]), str) {
"generated_text": "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
}
],
)
@require_torch outputs = text_generator(["This is a test", "This is a second test"], do_sample=False)
def test_generation_output_style(self): self.assertEqual(
text_generator = pipeline(task="text-generation", model=self.small_models[0]) outputs,
# text-generation is non-deterministic by nature, we can't fully test the output [
[
{
"generated_text": "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
}
],
[
{
"generated_text": "This is a second test Chieftain Chieftain prefecture prefecture prefecture Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
}
],
],
)
def run_pipeline_test(self, model, tokenizer, feature_extractor):
text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
outputs = text_generator("This is a test") outputs = text_generator("This is a test")
self.assertIn("This is a test", outputs[0]["generated_text"]) self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
outputs = text_generator("This is a test", return_full_text=False) outputs = text_generator("This is a test", return_full_text=False)
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertNotIn("This is a test", outputs[0]["generated_text"]) self.assertNotIn("This is a test", outputs[0]["generated_text"])
text_generator = pipeline(task="text-generation", model=self.small_models[0], return_full_text=False) text_generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer, return_full_text=False)
outputs = text_generator("This is a test") outputs = text_generator("This is a test")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertNotIn("This is a test", outputs[0]["generated_text"]) self.assertNotIn("This is a test", outputs[0]["generated_text"])
outputs = text_generator("This is a test", return_full_text=True) outputs = text_generator("This is a test", return_full_text=True)
self.assertIn("This is a test", outputs[0]["generated_text"]) self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment