"megatron/vscode:/vscode.git/clone" did not exist on "76928caa3331f97e0f48cb921d725604e22907ff"
pipelines_test.py 9.25 KB
Newer Older
1
import unittest
Morgan Funtowicz's avatar
Morgan Funtowicz committed
2
3
4
5
from typing import Iterable

from transformers import pipeline
from transformers.tests.utils import require_tf, require_torch
6

Aymeric Augustin's avatar
Aymeric Augustin committed
7

8
QA_FINETUNED_MODELS = {
9
10
11
    ("bert-base-uncased", "bert-large-uncased-whole-word-masking-finetuned-squad", None),
    ("bert-base-cased", "bert-large-cased-whole-word-masking-finetuned-squad", None),
    ("bert-base-uncased", "distilbert-base-uncased-distilled-squad", None),
Morgan Funtowicz's avatar
Morgan Funtowicz committed
12
13
}

14
TF_QA_FINETUNED_MODELS = {
15
16
17
    ("bert-base-uncased", "bert-large-uncased-whole-word-masking-finetuned-squad", None),
    ("bert-base-cased", "bert-large-cased-whole-word-masking-finetuned-squad", None),
    ("bert-base-uncased", "distilbert-base-uncased-distilled-squad", None),
18
19
20
21
}

TF_NER_FINETUNED_MODELS = {
    (
22
23
24
        "bert-base-cased",
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-tf_model.h5",
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json",
25
26
27
    )
}

Morgan Funtowicz's avatar
Morgan Funtowicz committed
28
29
NER_FINETUNED_MODELS = {
    (
30
31
32
        "bert-base-cased",
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-pytorch_model.bin",
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json",
Morgan Funtowicz's avatar
Morgan Funtowicz committed
33
34
35
36
    )
}

FEATURE_EXTRACT_FINETUNED_MODELS = {
37
38
39
    ("bert-base-cased", "bert-base-cased", None),
    # ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
    ("distilbert-base-uncased", "distilbert-base-uncased", None),
Morgan Funtowicz's avatar
Morgan Funtowicz committed
40
}
41

42
TF_FEATURE_EXTRACT_FINETUNED_MODELS = {
43
44
45
    ("bert-base-cased", "bert-base-cased", None),
    # ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
    ("distilbert-base-uncased", "distilbert-base-uncased", None),
46
47
48
49
}

TF_TEXT_CLASSIF_FINETUNED_MODELS = {
    (
50
51
52
        "bert-base-uncased",
        "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5",
        "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json",
53
54
55
    )
}

Morgan Funtowicz's avatar
Morgan Funtowicz committed
56
57
TEXT_CLASSIF_FINETUNED_MODELS = {
    (
58
59
60
        "bert-base-uncased",
        "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin",
        "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json",
Morgan Funtowicz's avatar
Morgan Funtowicz committed
61
    )
62
63
64
}


Morgan Funtowicz's avatar
Morgan Funtowicz committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
class MonoColumnInputTestCase(unittest.TestCase):
    def _test_mono_column_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
        self.assertIsNotNone(nlp)

        mono_result = nlp(valid_inputs[0])
        self.assertIsInstance(mono_result, list)
        self.assertIsInstance(mono_result[0], (dict, list))

        if isinstance(mono_result[0], list):
            mono_result = mono_result[0]

        for key in output_keys:
            self.assertIn(key, mono_result[0])

        multi_result = nlp(valid_inputs)
        self.assertIsInstance(multi_result, list)
        self.assertIsInstance(multi_result[0], (dict, list))

        if isinstance(multi_result[0], list):
            multi_result = multi_result[0]

        for result in multi_result:
            for key in output_keys:
                self.assertIn(key, result)

        self.assertRaises(Exception, nlp, invalid_inputs)

92
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
93
    def test_ner(self):
94
95
        mandatory_keys = {"entity", "word", "score"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
96
97
        invalid_inputs = [None]
        for tokenizer, model, config in NER_FINETUNED_MODELS:
98
            nlp = pipeline(task="ner", model=model, config=config, tokenizer=tokenizer)
99
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
100

101
102
    @require_tf
    def test_tf_ner(self):
103
104
        mandatory_keys = {"entity", "word", "score"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
105
        invalid_inputs = [None]
106
        for tokenizer, model, config in TF_NER_FINETUNED_MODELS:
107
            nlp = pipeline(task="ner", model=model, config=config, tokenizer=tokenizer)
108
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
109

110
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
111
    def test_sentiment_analysis(self):
112
113
        mandatory_keys = {"label"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
114
115
        invalid_inputs = [None]
        for tokenizer, model, config in TEXT_CLASSIF_FINETUNED_MODELS:
116
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
117
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
118

119
120
    @require_tf
    def test_tf_sentiment_analysis(self):
121
122
        mandatory_keys = {"label"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
123
        invalid_inputs = [None]
124
        for tokenizer, model, config in TF_TEXT_CLASSIF_FINETUNED_MODELS:
125
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
126
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
127

128
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
129
    def test_features_extraction(self):
130
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
131
132
        invalid_inputs = [None]
        for tokenizer, model, config in FEATURE_EXTRACT_FINETUNED_MODELS:
133
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
134
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
Morgan Funtowicz's avatar
Morgan Funtowicz committed
135

136
137
    @require_tf
    def test_tf_features_extraction(self):
138
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
139
        invalid_inputs = [None]
140
        for tokenizer, model, config in TF_FEATURE_EXTRACT_FINETUNED_MODELS:
141
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
142
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
Morgan Funtowicz's avatar
Morgan Funtowicz committed
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165


class MultiColumnInputTestCase(unittest.TestCase):
    def _test_multicolumn_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
        self.assertIsNotNone(nlp)

        mono_result = nlp(valid_inputs[0])
        self.assertIsInstance(mono_result, dict)

        for key in output_keys:
            self.assertIn(key, mono_result)

        multi_result = nlp(valid_inputs)
        self.assertIsInstance(multi_result, list)
        self.assertIsInstance(multi_result[0], dict)

        for result in multi_result:
            for key in output_keys:
                self.assertIn(key, result)

        self.assertRaises(Exception, nlp, invalid_inputs[0])
        self.assertRaises(Exception, nlp, invalid_inputs)

166
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
167
    def test_question_answering(self):
168
        mandatory_output_keys = {"score", "answer", "start", "end"}
Morgan Funtowicz's avatar
Morgan Funtowicz committed
169
        valid_samples = [
170
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
Morgan Funtowicz's avatar
Morgan Funtowicz committed
171
            {
172
173
174
                "question": "In what field is HuggingFace working ?",
                "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
            },
Morgan Funtowicz's avatar
Morgan Funtowicz committed
175
176
        ]
        invalid_samples = [
177
178
179
180
            {"question": "", "context": "This is a test to try empty question edge case"},
            {"question": None, "context": "This is a test to try empty question edge case"},
            {"question": "What is does with empty context ?", "context": ""},
            {"question": "What is does with empty context ?", "context": None},
Morgan Funtowicz's avatar
Morgan Funtowicz committed
181
182
183
        ]

        for tokenizer, model, config in QA_FINETUNED_MODELS:
184
            nlp = pipeline(task="question-answering", model=model, config=config, tokenizer=tokenizer)
185
            self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
186

187
188
    @require_tf
    def test_tf_question_answering(self):
189
        mandatory_output_keys = {"score", "answer", "start", "end"}
190
        valid_samples = [
191
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
192
            {
193
194
195
                "question": "In what field is HuggingFace working ?",
                "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
            },
196
197
        ]
        invalid_samples = [
198
199
200
201
            {"question": "", "context": "This is a test to try empty question edge case"},
            {"question": None, "context": "This is a test to try empty question edge case"},
            {"question": "What is does with empty context ?", "context": ""},
            {"question": "What is does with empty context ?", "context": None},
202
        ]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
203

204
        for tokenizer, model, config in TF_QA_FINETUNED_MODELS:
205
            nlp = pipeline(task="question-answering", model=model, config=config, tokenizer=tokenizer)
206
            self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
207
208


209
if __name__ == "__main__":
210
    unittest.main()