test_pipelines.py 15.7 KB
Newer Older
1
import unittest
Julien Chaumond's avatar
Julien Chaumond committed
2
from typing import Iterable, List, Optional
Morgan Funtowicz's avatar
Morgan Funtowicz committed
3
4

from transformers import pipeline
Patrick von Platen's avatar
Patrick von Platen committed
5
from transformers.pipelines import Pipeline
6

Lysandre Debut's avatar
Lysandre Debut committed
7
from .utils import require_tf, require_torch, slow
8

Aymeric Augustin's avatar
Aymeric Augustin committed
9

10
11
QA_FINETUNED_MODELS = [
    (("bert-base-uncased", {"use_fast": False}), "bert-large-uncased-whole-word-masking-finetuned-squad", None),
Patrick von Platen's avatar
Patrick von Platen committed
12
    (("distilbert-base-cased-distilled-squad", {"use_fast": False}), "distilbert-base-cased-distilled-squad", None),
13
]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
14

15
16
TF_QA_FINETUNED_MODELS = [
    (("bert-base-uncased", {"use_fast": False}), "bert-large-uncased-whole-word-masking-finetuned-squad", None),
Patrick von Platen's avatar
Patrick von Platen committed
17
    (("distilbert-base-cased-distilled-squad", {"use_fast": False}), "distilbert-base-cased-distilled-squad", None),
18
]
19
20
21

TF_NER_FINETUNED_MODELS = {
    (
22
        "bert-base-cased",
Julien Chaumond's avatar
Julien Chaumond committed
23
24
        "dbmdz/bert-large-cased-finetuned-conll03-english",
        "dbmdz/bert-large-cased-finetuned-conll03-english",
25
26
27
    )
}

Morgan Funtowicz's avatar
Morgan Funtowicz committed
28
29
NER_FINETUNED_MODELS = {
    (
30
        "bert-base-cased",
Julien Chaumond's avatar
Julien Chaumond committed
31
32
        "dbmdz/bert-large-cased-finetuned-conll03-english",
        "dbmdz/bert-large-cased-finetuned-conll03-english",
Morgan Funtowicz's avatar
Morgan Funtowicz committed
33
34
35
36
    )
}

FEATURE_EXTRACT_FINETUNED_MODELS = {
37
38
    ("bert-base-cased", "bert-base-cased", None),
    # ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
39
    ("distilbert-base-cased", "distilbert-base-cased", None),
Morgan Funtowicz's avatar
Morgan Funtowicz committed
40
}
41

42
TF_FEATURE_EXTRACT_FINETUNED_MODELS = {
43
    # ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
44
    ("distilbert-base-cased", "distilbert-base-cased", None),
45
46
47
48
}

TF_TEXT_CLASSIF_FINETUNED_MODELS = {
    (
49
        "bert-base-uncased",
50
51
        "distilbert-base-uncased-finetuned-sst-2-english",
        "distilbert-base-uncased-finetuned-sst-2-english",
52
53
54
    )
}

Morgan Funtowicz's avatar
Morgan Funtowicz committed
55
56
TEXT_CLASSIF_FINETUNED_MODELS = {
    (
Funtowicz Morgan's avatar
Funtowicz Morgan committed
57
        "distilbert-base-cased",
58
59
        "distilbert-base-uncased-finetuned-sst-2-english",
        "distilbert-base-uncased-finetuned-sst-2-english",
Morgan Funtowicz's avatar
Morgan Funtowicz committed
60
    )
61
62
}

63
64
65
FILL_MASK_FINETUNED_MODELS = [
    (("distilroberta-base", {"use_fast": False}), "distilroberta-base", None),
]
Julien Chaumond's avatar
Julien Chaumond committed
66

67
68
69
TF_FILL_MASK_FINETUNED_MODELS = [
    (("distilroberta-base", {"use_fast": False}), "distilroberta-base", None),
]
Julien Chaumond's avatar
Julien Chaumond committed
70

71
72
73
74
75
SUMMARIZATION_FINETUNED_MODELS = {
    ("sshleifer/bart-tiny-random", "bart-large-cnn"),
    ("patrickvonplaten/t5-tiny-random", "t5-small"),
}
TF_SUMMARIZATION_FINETUNED_MODELS = {("patrickvonplaten/t5-tiny-random", "t5-small")}
76

77
TRANSLATION_FINETUNED_MODELS = {
78
79
    ("patrickvonplaten/t5-tiny-random", "t5-small", "translation_en_to_de"),
    ("patrickvonplaten/t5-tiny-random", "t5-small", "translation_en_to_ro"),
80
}
81
TF_TRANSLATION_FINETUNED_MODELS = {("patrickvonplaten/t5-tiny-random", "t5-small", "translation_en_to_fr")}
82

83

Morgan Funtowicz's avatar
Morgan Funtowicz committed
84
class MonoColumnInputTestCase(unittest.TestCase):
Julien Chaumond's avatar
Julien Chaumond committed
85
86
87
88
89
90
91
92
93
    def _test_mono_column_pipeline(
        self,
        nlp: Pipeline,
        valid_inputs: List,
        invalid_inputs: List,
        output_keys: Iterable[str],
        expected_multi_result: Optional[List] = None,
        expected_check_keys: Optional[List[str]] = None,
    ):
Morgan Funtowicz's avatar
Morgan Funtowicz committed
94
95
96
97
98
99
100
101
102
103
104
105
        self.assertIsNotNone(nlp)

        mono_result = nlp(valid_inputs[0])
        self.assertIsInstance(mono_result, list)
        self.assertIsInstance(mono_result[0], (dict, list))

        if isinstance(mono_result[0], list):
            mono_result = mono_result[0]

        for key in output_keys:
            self.assertIn(key, mono_result[0])

106
        multi_result = [nlp(input) for input in valid_inputs]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
107
108
109
        self.assertIsInstance(multi_result, list)
        self.assertIsInstance(multi_result[0], (dict, list))

Julien Chaumond's avatar
Julien Chaumond committed
110
111
112
113
114
115
116
        if expected_multi_result is not None:
            for result, expect in zip(multi_result, expected_multi_result):
                for key in expected_check_keys or []:
                    self.assertEqual(
                        set([o[key] for o in result]), set([o[key] for o in expect]),
                    )

Morgan Funtowicz's avatar
Morgan Funtowicz committed
117
118
119
120
121
122
123
124
125
        if isinstance(multi_result[0], list):
            multi_result = multi_result[0]

        for result in multi_result:
            for key in output_keys:
                self.assertIn(key, result)

        self.assertRaises(Exception, nlp, invalid_inputs)

126
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
127
    def test_ner(self):
128
129
        mandatory_keys = {"entity", "word", "score"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
130
131
        invalid_inputs = [None]
        for tokenizer, model, config in NER_FINETUNED_MODELS:
132
            nlp = pipeline(task="ner", model=model, config=config, tokenizer=tokenizer)
133
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
134

135
136
    @require_tf
    def test_tf_ner(self):
137
138
        mandatory_keys = {"entity", "word", "score"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
139
        invalid_inputs = [None]
140
        for tokenizer, model, config in TF_NER_FINETUNED_MODELS:
141
            nlp = pipeline(task="ner", model=model, config=config, tokenizer=tokenizer, framework="tf")
142
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
143

144
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
145
    def test_sentiment_analysis(self):
Julien Chaumond's avatar
Julien Chaumond committed
146
        mandatory_keys = {"label", "score"}
147
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
148
149
        invalid_inputs = [None]
        for tokenizer, model, config in TEXT_CLASSIF_FINETUNED_MODELS:
150
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
151
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
152

153
154
    @require_tf
    def test_tf_sentiment_analysis(self):
Julien Chaumond's avatar
Julien Chaumond committed
155
        mandatory_keys = {"label", "score"}
156
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
157
        invalid_inputs = [None]
158
        for tokenizer, model, config in TF_TEXT_CLASSIF_FINETUNED_MODELS:
159
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer, framework="tf")
160
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
161

162
    @require_torch
Julien Chaumond's avatar
Julien Chaumond committed
163
    def test_feature_extraction(self):
164
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
165
166
        invalid_inputs = [None]
        for tokenizer, model, config in FEATURE_EXTRACT_FINETUNED_MODELS:
Julien Chaumond's avatar
Julien Chaumond committed
167
            nlp = pipeline(task="feature-extraction", model=model, config=config, tokenizer=tokenizer)
168
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
Morgan Funtowicz's avatar
Morgan Funtowicz committed
169

170
    @require_tf
Julien Chaumond's avatar
Julien Chaumond committed
171
    def test_tf_feature_extraction(self):
172
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
173
        invalid_inputs = [None]
174
        for tokenizer, model, config in TF_FEATURE_EXTRACT_FINETUNED_MODELS:
175
            nlp = pipeline(task="feature-extraction", model=model, config=config, tokenizer=tokenizer, framework="tf")
176
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
Morgan Funtowicz's avatar
Morgan Funtowicz committed
177

Julien Chaumond's avatar
Julien Chaumond committed
178
179
180
181
182
183
184
185
186
187
    @require_torch
    def test_fill_mask(self):
        mandatory_keys = {"sequence", "score", "token"}
        valid_inputs = [
            "My name is <mask>",
            "The largest city in France is <mask>",
        ]
        invalid_inputs = [None]
        expected_multi_result = [
            [
188
189
                {"sequence": "<s> My name is:</s>", "score": 0.009954338893294334, "token": 35},
                {"sequence": "<s> My name is John</s>", "score": 0.0080940006300807, "token": 610},
Julien Chaumond's avatar
Julien Chaumond committed
190
191
192
            ],
            [
                {
193
194
195
196
197
198
199
                    "sequence": "<s> The largest city in France is Paris</s>",
                    "score": 0.3185044229030609,
                    "token": 2201,
                },
                {
                    "sequence": "<s> The largest city in France is Lyon</s>",
                    "score": 0.21112334728240967,
Julien Chaumond's avatar
Julien Chaumond committed
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
                    "token": 12790,
                },
            ],
        ]
        for tokenizer, model, config in FILL_MASK_FINETUNED_MODELS:
            nlp = pipeline(task="fill-mask", model=model, config=config, tokenizer=tokenizer, topk=2)
            self._test_mono_column_pipeline(
                nlp,
                valid_inputs,
                invalid_inputs,
                mandatory_keys,
                expected_multi_result=expected_multi_result,
                expected_check_keys=["sequence"],
            )

    @require_tf
    def test_tf_fill_mask(self):
        mandatory_keys = {"sequence", "score", "token"}
        valid_inputs = [
            "My name is <mask>",
            "The largest city in France is <mask>",
        ]
        invalid_inputs = [None]
        expected_multi_result = [
            [
225
226
                {"sequence": "<s> My name is:</s>", "score": 0.009954338893294334, "token": 35},
                {"sequence": "<s> My name is John</s>", "score": 0.0080940006300807, "token": 610},
Julien Chaumond's avatar
Julien Chaumond committed
227
228
229
            ],
            [
                {
230
231
232
233
234
235
236
                    "sequence": "<s> The largest city in France is Paris</s>",
                    "score": 0.3185044229030609,
                    "token": 2201,
                },
                {
                    "sequence": "<s> The largest city in France is Lyon</s>",
                    "score": 0.21112334728240967,
Julien Chaumond's avatar
Julien Chaumond committed
237
238
239
240
241
                    "token": 12790,
                },
            ],
        ]
        for tokenizer, model, config in TF_FILL_MASK_FINETUNED_MODELS:
242
            nlp = pipeline(task="fill-mask", model=model, config=config, tokenizer=tokenizer, framework="tf", topk=2)
Julien Chaumond's avatar
Julien Chaumond committed
243
244
245
246
247
248
249
250
251
            self._test_mono_column_pipeline(
                nlp,
                valid_inputs,
                invalid_inputs,
                mandatory_keys,
                expected_multi_result=expected_multi_result,
                expected_check_keys=["sequence"],
            )

252
253
254
255
256
    @require_torch
    def test_summarization(self):
        valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
        invalid_inputs = [4, "<mask>"]
        mandatory_keys = ["summary_text"]
257
258
259
260
261
262
263
264
265
266
267
268
269
270
        for model, tokenizer in SUMMARIZATION_FINETUNED_MODELS:
            nlp = pipeline(task="summarization", model=model, tokenizer=tokenizer)
            self._test_mono_column_pipeline(
                nlp, valid_inputs, invalid_inputs, mandatory_keys,
            )

    @require_tf
    def test_tf_summarization(self):
        valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
        invalid_inputs = [4, "<mask>"]
        mandatory_keys = ["summary_text"]
        for model, tokenizer in TF_SUMMARIZATION_FINETUNED_MODELS:
            nlp = pipeline(task="summarization", model=model, tokenizer=tokenizer, framework="tf")
            self._test_mono_column_pipeline(
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
                nlp, valid_inputs, invalid_inputs, mandatory_keys,
            )

    @require_torch
    def test_translation(self):
        valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
        invalid_inputs = [4, "<mask>"]
        mandatory_keys = ["translation_text"]
        for model, tokenizer, task in TRANSLATION_FINETUNED_MODELS:
            nlp = pipeline(task=task, model=model, tokenizer=tokenizer)
            self._test_mono_column_pipeline(
                nlp, valid_inputs, invalid_inputs, mandatory_keys,
            )

    @require_tf
    def test_tf_translation(self):
        valid_inputs = ["A string like this", ["list of strings entry 1", "list of strings v2"]]
        invalid_inputs = [4, "<mask>"]
        mandatory_keys = ["translation_text"]
        for model, tokenizer, task in TF_TRANSLATION_FINETUNED_MODELS:
            nlp = pipeline(task=task, model=model, tokenizer=tokenizer, framework="tf")
            self._test_mono_column_pipeline(
293
294
                nlp, valid_inputs, invalid_inputs, mandatory_keys,
            )
295

Morgan Funtowicz's avatar
Morgan Funtowicz committed
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317

class MultiColumnInputTestCase(unittest.TestCase):
    def _test_multicolumn_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
        self.assertIsNotNone(nlp)

        mono_result = nlp(valid_inputs[0])
        self.assertIsInstance(mono_result, dict)

        for key in output_keys:
            self.assertIn(key, mono_result)

        multi_result = nlp(valid_inputs)
        self.assertIsInstance(multi_result, list)
        self.assertIsInstance(multi_result[0], dict)

        for result in multi_result:
            for key in output_keys:
                self.assertIn(key, result)

        self.assertRaises(Exception, nlp, invalid_inputs[0])
        self.assertRaises(Exception, nlp, invalid_inputs)

318
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
319
    def test_question_answering(self):
320
        mandatory_output_keys = {"score", "answer", "start", "end"}
Morgan Funtowicz's avatar
Morgan Funtowicz committed
321
        valid_samples = [
322
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
Morgan Funtowicz's avatar
Morgan Funtowicz committed
323
            {
324
325
326
                "question": "In what field is HuggingFace working ?",
                "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
            },
Morgan Funtowicz's avatar
Morgan Funtowicz committed
327
328
        ]
        invalid_samples = [
329
330
331
332
            {"question": "", "context": "This is a test to try empty question edge case"},
            {"question": None, "context": "This is a test to try empty question edge case"},
            {"question": "What is does with empty context ?", "context": ""},
            {"question": "What is does with empty context ?", "context": None},
Morgan Funtowicz's avatar
Morgan Funtowicz committed
333
334
335
        ]

        for tokenizer, model, config in QA_FINETUNED_MODELS:
336
            nlp = pipeline(task="question-answering", model=model, config=config, tokenizer=tokenizer)
337
            self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
338

339
    @require_tf
Lysandre's avatar
Lysandre committed
340
    @slow
341
    def test_tf_question_answering(self):
342
        mandatory_output_keys = {"score", "answer", "start", "end"}
343
        valid_samples = [
344
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
345
            {
346
347
348
                "question": "In what field is HuggingFace working ?",
                "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
            },
349
350
        ]
        invalid_samples = [
351
352
353
354
            {"question": "", "context": "This is a test to try empty question edge case"},
            {"question": None, "context": "This is a test to try empty question edge case"},
            {"question": "What is does with empty context ?", "context": ""},
            {"question": "What is does with empty context ?", "context": None},
355
        ]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
356

357
        for tokenizer, model, config in TF_QA_FINETUNED_MODELS:
358
            nlp = pipeline(task="question-answering", model=model, config=config, tokenizer=tokenizer, framework="tf")
359
            self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
Lysandre Debut's avatar
Lysandre Debut committed
360
361
362
363
364


class PipelineCommonTests(unittest.TestCase):

    pipelines = (
Patrick von Platen's avatar
Patrick von Platen committed
365
366
367
368
369
370
371
372
373
        "ner",
        "feature-extraction",
        "question-answering",
        "fill-mask",
        "summarization",
        "sentiment-analysis",
        "translation_en_to_fr",
        "translation_en_to_de",
        "translation_en_to_ro",
Lysandre Debut's avatar
Lysandre Debut committed
374
375
376
377
378
379
    )

    @slow
    @require_tf
    def test_tf_defaults(self):
        # Test that pipelines can be correctly loaded without any argument
Patrick von Platen's avatar
Patrick von Platen committed
380
381
382
        for task in self.pipelines:
            with self.subTest(msg="Testing Torch defaults with PyTorch and {}".format(task)):
                pipeline(task, framework="tf")
Lysandre Debut's avatar
Lysandre Debut committed
383
384
385
386
387

    @slow
    @require_torch
    def test_pt_defaults(self):
        # Test that pipelines can be correctly loaded without any argument
Patrick von Platen's avatar
Patrick von Platen committed
388
389
390
        for task in self.pipelines:
            with self.subTest(msg="Testing Torch defaults with PyTorch and {}".format(task)):
                pipeline(task, framework="pt")