"pytorch_pretrained_bert/tests/tokenization_test.py" did not exist on "34bdb7f9cb82593a64a7e94bf26325f3ba35f0d8"
test_pipelines.py 9.19 KB
Newer Older
1
import unittest
Morgan Funtowicz's avatar
Morgan Funtowicz committed
2
3
4
from typing import Iterable

from transformers import pipeline
5
6

from .utils import require_tf, require_torch
7

Aymeric Augustin's avatar
Aymeric Augustin committed
8

9
QA_FINETUNED_MODELS = {
10
11
12
    ("bert-base-uncased", "bert-large-uncased-whole-word-masking-finetuned-squad", None),
    ("bert-base-cased", "bert-large-cased-whole-word-masking-finetuned-squad", None),
    ("bert-base-uncased", "distilbert-base-uncased-distilled-squad", None),
Morgan Funtowicz's avatar
Morgan Funtowicz committed
13
14
}

15
TF_QA_FINETUNED_MODELS = {
16
17
18
    ("bert-base-uncased", "bert-large-uncased-whole-word-masking-finetuned-squad", None),
    ("bert-base-cased", "bert-large-cased-whole-word-masking-finetuned-squad", None),
    ("bert-base-uncased", "distilbert-base-uncased-distilled-squad", None),
19
20
21
22
}

TF_NER_FINETUNED_MODELS = {
    (
23
24
25
        "bert-base-cased",
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-tf_model.h5",
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json",
26
27
28
    )
}

Morgan Funtowicz's avatar
Morgan Funtowicz committed
29
30
NER_FINETUNED_MODELS = {
    (
31
32
33
        "bert-base-cased",
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-pytorch_model.bin",
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json",
Morgan Funtowicz's avatar
Morgan Funtowicz committed
34
35
36
37
    )
}

FEATURE_EXTRACT_FINETUNED_MODELS = {
38
39
40
    ("bert-base-cased", "bert-base-cased", None),
    # ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
    ("distilbert-base-uncased", "distilbert-base-uncased", None),
Morgan Funtowicz's avatar
Morgan Funtowicz committed
41
}
42

43
TF_FEATURE_EXTRACT_FINETUNED_MODELS = {
44
45
46
    ("bert-base-cased", "bert-base-cased", None),
    # ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
    ("distilbert-base-uncased", "distilbert-base-uncased", None),
47
48
49
50
}

TF_TEXT_CLASSIF_FINETUNED_MODELS = {
    (
51
52
53
        "bert-base-uncased",
        "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5",
        "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json",
54
55
56
    )
}

Morgan Funtowicz's avatar
Morgan Funtowicz committed
57
58
TEXT_CLASSIF_FINETUNED_MODELS = {
    (
59
60
61
        "bert-base-uncased",
        "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin",
        "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json",
Morgan Funtowicz's avatar
Morgan Funtowicz committed
62
    )
63
64
65
}


Morgan Funtowicz's avatar
Morgan Funtowicz committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
class MonoColumnInputTestCase(unittest.TestCase):
    def _test_mono_column_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
        self.assertIsNotNone(nlp)

        mono_result = nlp(valid_inputs[0])
        self.assertIsInstance(mono_result, list)
        self.assertIsInstance(mono_result[0], (dict, list))

        if isinstance(mono_result[0], list):
            mono_result = mono_result[0]

        for key in output_keys:
            self.assertIn(key, mono_result[0])

        multi_result = nlp(valid_inputs)
        self.assertIsInstance(multi_result, list)
        self.assertIsInstance(multi_result[0], (dict, list))

        if isinstance(multi_result[0], list):
            multi_result = multi_result[0]

        for result in multi_result:
            for key in output_keys:
                self.assertIn(key, result)

        self.assertRaises(Exception, nlp, invalid_inputs)

93
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
94
    def test_ner(self):
95
96
        mandatory_keys = {"entity", "word", "score"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
97
98
        invalid_inputs = [None]
        for tokenizer, model, config in NER_FINETUNED_MODELS:
99
            nlp = pipeline(task="ner", model=model, config=config, tokenizer=tokenizer)
100
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
101

102
103
    @require_tf
    def test_tf_ner(self):
104
105
        mandatory_keys = {"entity", "word", "score"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
106
        invalid_inputs = [None]
107
        for tokenizer, model, config in TF_NER_FINETUNED_MODELS:
108
            nlp = pipeline(task="ner", model=model, config=config, tokenizer=tokenizer)
109
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
110

111
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
112
    def test_sentiment_analysis(self):
113
114
        mandatory_keys = {"label"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
115
116
        invalid_inputs = [None]
        for tokenizer, model, config in TEXT_CLASSIF_FINETUNED_MODELS:
117
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
118
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
119

120
121
    @require_tf
    def test_tf_sentiment_analysis(self):
122
123
        mandatory_keys = {"label"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
124
        invalid_inputs = [None]
125
        for tokenizer, model, config in TF_TEXT_CLASSIF_FINETUNED_MODELS:
126
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
127
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
128

129
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
130
    def test_features_extraction(self):
131
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
132
133
        invalid_inputs = [None]
        for tokenizer, model, config in FEATURE_EXTRACT_FINETUNED_MODELS:
134
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
135
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
Morgan Funtowicz's avatar
Morgan Funtowicz committed
136

137
138
    @require_tf
    def test_tf_features_extraction(self):
139
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
140
        invalid_inputs = [None]
141
        for tokenizer, model, config in TF_FEATURE_EXTRACT_FINETUNED_MODELS:
142
            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
143
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
Morgan Funtowicz's avatar
Morgan Funtowicz committed
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166


class MultiColumnInputTestCase(unittest.TestCase):
    def _test_multicolumn_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
        self.assertIsNotNone(nlp)

        mono_result = nlp(valid_inputs[0])
        self.assertIsInstance(mono_result, dict)

        for key in output_keys:
            self.assertIn(key, mono_result)

        multi_result = nlp(valid_inputs)
        self.assertIsInstance(multi_result, list)
        self.assertIsInstance(multi_result[0], dict)

        for result in multi_result:
            for key in output_keys:
                self.assertIn(key, result)

        self.assertRaises(Exception, nlp, invalid_inputs[0])
        self.assertRaises(Exception, nlp, invalid_inputs)

167
    @require_torch
Morgan Funtowicz's avatar
Morgan Funtowicz committed
168
    def test_question_answering(self):
169
        mandatory_output_keys = {"score", "answer", "start", "end"}
Morgan Funtowicz's avatar
Morgan Funtowicz committed
170
        valid_samples = [
171
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
Morgan Funtowicz's avatar
Morgan Funtowicz committed
172
            {
173
174
175
                "question": "In what field is HuggingFace working ?",
                "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
            },
Morgan Funtowicz's avatar
Morgan Funtowicz committed
176
177
        ]
        invalid_samples = [
178
179
180
181
            {"question": "", "context": "This is a test to try empty question edge case"},
            {"question": None, "context": "This is a test to try empty question edge case"},
            {"question": "What is does with empty context ?", "context": ""},
            {"question": "What is does with empty context ?", "context": None},
Morgan Funtowicz's avatar
Morgan Funtowicz committed
182
183
184
        ]

        for tokenizer, model, config in QA_FINETUNED_MODELS:
185
            nlp = pipeline(task="question-answering", model=model, config=config, tokenizer=tokenizer)
186
            self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
Morgan Funtowicz's avatar
Morgan Funtowicz committed
187

188
189
    @require_tf
    def test_tf_question_answering(self):
190
        mandatory_output_keys = {"score", "answer", "start", "end"}
191
        valid_samples = [
192
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
193
            {
194
195
196
                "question": "In what field is HuggingFace working ?",
                "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
            },
197
198
        ]
        invalid_samples = [
199
200
201
202
            {"question": "", "context": "This is a test to try empty question edge case"},
            {"question": None, "context": "This is a test to try empty question edge case"},
            {"question": "What is does with empty context ?", "context": ""},
            {"question": "What is does with empty context ?", "context": None},
203
        ]
Morgan Funtowicz's avatar
Morgan Funtowicz committed
204

205
        for tokenizer, model, config in TF_QA_FINETUNED_MODELS:
206
            nlp = pipeline(task="question-answering", model=model, config=config, tokenizer=tokenizer)
207
            self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)