test_modeling_tf_bert.py 15.1 KB
Newer Older
thomwolf's avatar
thomwolf committed
1
# coding=utf-8
Sylvain Gugger's avatar
Sylvain Gugger committed
2
# Copyright 2020 The HuggingFace Team. All rights reserved.
thomwolf's avatar
thomwolf committed
3
4
5
6
7
8
9
10
11
12
13
14
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Aymeric Augustin's avatar
Aymeric Augustin committed
15

thomwolf's avatar
thomwolf committed
16

17
18
import unittest

Aymeric Augustin's avatar
Aymeric Augustin committed
19
from transformers import BertConfig, is_tf_available
20
from transformers.testing_utils import require_tf, slow
thomwolf's avatar
thomwolf committed
21

22
from .test_configuration_common import ConfigTester
23
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
thomwolf's avatar
thomwolf committed
24

thomwolf's avatar
thomwolf committed
25

thomwolf's avatar
thomwolf committed
26
if is_tf_available():
thomwolf's avatar
thomwolf committed
27
    import tensorflow as tf
28

29
    from transformers import TF_MODEL_FOR_PRETRAINING_MAPPING
Sylvain Gugger's avatar
Sylvain Gugger committed
30
    from transformers.models.bert.modeling_tf_bert import (
31
        TFBertForMaskedLM,
32
        TFBertForMultipleChoice,
33
34
        TFBertForNextSentencePrediction,
        TFBertForPreTraining,
35
        TFBertForQuestionAnswering,
36
37
        TFBertForSequenceClassification,
        TFBertForTokenClassification,
38
39
        TFBertLMHeadModel,
        TFBertModel,
40
    )
thomwolf's avatar
thomwolf committed
41

thomwolf's avatar
thomwolf committed
42

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
class TFBertModelTester:
    def __init__(
        self,
        parent,
        batch_size=13,
        seq_length=7,
        is_training=True,
        use_input_mask=True,
        use_token_type_ids=True,
        use_labels=True,
        vocab_size=99,
        hidden_size=32,
        num_hidden_layers=5,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        max_position_embeddings=512,
        type_vocab_size=16,
        type_sequence_label_size=2,
        initializer_range=0.02,
        num_labels=3,
        num_choices=4,
        scope=None,
    ):
        self.parent = parent
        self.batch_size = 13
        self.seq_length = 7
        self.is_training = True
        self.use_input_mask = True
        self.use_token_type_ids = True
        self.use_labels = True
        self.vocab_size = 99
        self.hidden_size = 32
        self.num_hidden_layers = 5
        self.num_attention_heads = 4
        self.intermediate_size = 37
        self.hidden_act = "gelu"
        self.hidden_dropout_prob = 0.1
        self.attention_probs_dropout_prob = 0.1
        self.max_position_embeddings = 512
        self.type_vocab_size = 16
        self.type_sequence_label_size = 2
        self.initializer_range = 0.02
        self.num_labels = 3
        self.num_choices = 4
        self.scope = None
thomwolf's avatar
thomwolf committed
91

92
93
94
95
96
97
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
thomwolf's avatar
thomwolf committed
98

99
100
101
        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
thomwolf's avatar
thomwolf committed
102

103
104
105
106
107
108
109
        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)
thomwolf's avatar
thomwolf committed
110

111
112
113
114
115
116
117
118
119
120
121
122
123
        config = BertConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            initializer_range=self.initializer_range,
        )
thomwolf's avatar
thomwolf committed
124

125
        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
thomwolf's avatar
thomwolf committed
126

127
128
129
130
131
132
    def create_and_check_bert_model(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFBertModel(config=config)
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
        sequence_output, pooled_output = model(inputs)
thomwolf's avatar
thomwolf committed
133

134
        inputs = [input_ids, input_mask]
Sylvain Gugger's avatar
Sylvain Gugger committed
135
        result = model(inputs)
thomwolf's avatar
thomwolf committed
136

Sylvain Gugger's avatar
Sylvain Gugger committed
137
        result = model(input_ids)
thomwolf's avatar
thomwolf committed
138

139
140
        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
        self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
thomwolf's avatar
thomwolf committed
141

142
143
144
145
146
147
148
149
150
151
    def create_and_check_bert_lm_head(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.is_decoder = True
        model = TFBertLMHeadModel(config=config)
        inputs = {
            "input_ids": input_ids,
            "attention_mask": input_mask,
            "token_type_ids": token_type_ids,
        }
Sylvain Gugger's avatar
Sylvain Gugger committed
152
        prediction_scores = model(inputs)["logits"]
153
154
155
156
        self.parent.assertListEqual(
            list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
        )

157
158
159
160
    def create_and_check_bert_for_masked_lm(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFBertForMaskedLM(config=config)
161
162
163
164
165
        inputs = {
            "input_ids": input_ids,
            "attention_mask": input_mask,
            "token_type_ids": token_type_ids,
        }
Sylvain Gugger's avatar
Sylvain Gugger committed
166
        result = model(inputs)
167
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
thomwolf's avatar
thomwolf committed
168

169
170
171
172
173
    def create_and_check_bert_for_next_sequence_prediction(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFBertForNextSentencePrediction(config=config)
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
174
        result = model(inputs)
175
        self.parent.assertEqual(result.logits.shape, (self.batch_size, 2))
thomwolf's avatar
thomwolf committed
176

177
178
179
180
181
    def create_and_check_bert_for_pretraining(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFBertForPreTraining(config=config)
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
182
        result = model(inputs)
183
184
        self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
        self.parent.assertEqual(result.seq_relationship_logits.shape, (self.batch_size, 2))
thomwolf's avatar
thomwolf committed
185

186
187
188
189
190
    def create_and_check_bert_for_sequence_classification(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.num_labels = self.num_labels
        model = TFBertForSequenceClassification(config=config)
191
192
193
194
        inputs = {
            "input_ids": input_ids,
            "attention_mask": input_mask,
            "token_type_ids": token_type_ids,
195
        }
196

Sylvain Gugger's avatar
Sylvain Gugger committed
197
        result = model(inputs)
198
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
thomwolf's avatar
thomwolf committed
199

200
201
202
203
204
205
206
207
208
209
210
211
212
    def create_and_check_bert_for_multiple_choice(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.num_choices = self.num_choices
        model = TFBertForMultipleChoice(config=config)
        multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
        multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
        multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
        inputs = {
            "input_ids": multiple_choice_inputs_ids,
            "attention_mask": multiple_choice_input_mask,
            "token_type_ids": multiple_choice_token_type_ids,
        }
Sylvain Gugger's avatar
Sylvain Gugger committed
213
        result = model(inputs)
214
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
thomwolf's avatar
thomwolf committed
215

216
217
218
219
220
    def create_and_check_bert_for_token_classification(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.num_labels = self.num_labels
        model = TFBertForTokenClassification(config=config)
221
222
223
224
225
        inputs = {
            "input_ids": input_ids,
            "attention_mask": input_mask,
            "token_type_ids": token_type_ids,
        }
Sylvain Gugger's avatar
Sylvain Gugger committed
226
        result = model(inputs)
227
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
thomwolf's avatar
thomwolf committed
228

229
230
231
232
    def create_and_check_bert_for_question_answering(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFBertForQuestionAnswering(config=config)
233
234
235
236
        inputs = {
            "input_ids": input_ids,
            "attention_mask": input_mask,
            "token_type_ids": token_type_ids,
237
        }
238

Sylvain Gugger's avatar
Sylvain Gugger committed
239
        result = model(inputs)
240
241
        self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
        self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
thomwolf's avatar
thomwolf committed
242

243
244
245
246
247
248
249
250
251
252
253
254
255
    def prepare_config_and_inputs_for_common(self):
        config_and_inputs = self.prepare_config_and_inputs()
        (
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
        ) = config_and_inputs
        inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
        return config, inputs_dict
thomwolf's avatar
thomwolf committed
256
257


258
259
@require_tf
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
thomwolf's avatar
thomwolf committed
260

261
262
263
264
    all_model_classes = (
        (
            TFBertModel,
            TFBertForMaskedLM,
Lysandre Debut's avatar
Lysandre Debut committed
265
            TFBertLMHeadModel,
266
267
268
269
270
271
272
273
274
275
            TFBertForNextSentencePrediction,
            TFBertForPreTraining,
            TFBertForQuestionAnswering,
            TFBertForSequenceClassification,
            TFBertForTokenClassification,
            TFBertForMultipleChoice,
        )
        if is_tf_available()
        else ()
    )
276
    test_head_masking = False
thomwolf's avatar
thomwolf committed
277

278
279
280
281
282
283
284
285
286
287
    # special case for ForPreTraining model
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)

        if return_labels:
            if model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.values():
                inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)

        return inputs_dict

thomwolf's avatar
thomwolf committed
288
    def setUp(self):
289
        self.model_tester = TFBertModelTester(self)
thomwolf's avatar
thomwolf committed
290
291
292
293
294
295
296
297
298
299
300
301
302
        self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_bert_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_masked_lm(*config_and_inputs)

303
304
305
306
    def test_for_causal_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_lm_head(*config_and_inputs)

thomwolf's avatar
thomwolf committed
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)

    def test_for_next_sequence_prediction(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_pretraining(*config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_question_answering(*config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_sequence_classification(*config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_token_classification(*config_and_inputs)

    def test_model_from_pretrained(self):
Julien Plu's avatar
Julien Plu committed
332
333
334
        model = TFBertModel.from_pretrained("jplu/tiny-tf-bert-random")
        self.assertIsNotNone(model)

335
336
337
338
339
340
341
342
343
    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        list_lm_models = [TFBertForMaskedLM, TFBertForPreTraining, TFBertLMHeadModel]

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)

            if model_class in list_lm_models:
344
                x = model.get_output_embeddings()
345
                assert isinstance(x, tf.keras.layers.Layer)
346
347
348
349
                name = model.get_bias()
                assert isinstance(name, dict)
                for k, v in name.items():
                    assert isinstance(v, tf.Variable)
350
            else:
351
                x = model.get_output_embeddings()
352
                assert x is None
353
354
                name = model.get_bias()
                assert name is None
355

Julien Plu's avatar
Julien Plu committed
356
357
    def test_custom_load_tf_weights(self):
        model, output_loading_info = TFBertForTokenClassification.from_pretrained(
Julien Chaumond's avatar
Julien Chaumond committed
358
            "jplu/tiny-tf-bert-random", output_loading_info=True
Julien Plu's avatar
Julien Plu committed
359
        )
Julien Plu's avatar
Julien Plu committed
360
        self.assertEqual(sorted(output_loading_info["unexpected_keys"]), [])
Julien Plu's avatar
Julien Plu committed
361
362
        for layer in output_loading_info["missing_keys"]:
            self.assertTrue(layer.split("_")[0] in ["dropout", "classifier"])
363
364


365
@require_tf
366
367
368
369
370
371
372
class TFBertModelIntegrationTest(unittest.TestCase):
    @slow
    def test_inference_masked_lm(self):
        model = TFBertForPreTraining.from_pretrained("lysandre/tiny-bert-random")
        input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
        output = model(input_ids)[0]

LysandreJik's avatar
LysandreJik committed
373
        expected_shape = [1, 6, 32000]
374
375
376
377
378
379
380
        self.assertEqual(output.shape, expected_shape)

        print(output[:, :3, :3])

        expected_slice = tf.constant(
            [
                [
LysandreJik's avatar
LysandreJik committed
381
382
383
                    [-0.05243197, -0.04498899, 0.05512108],
                    [-0.07444685, -0.01064632, 0.04352357],
                    [-0.05020351, 0.05530146, 0.00700043],
384
385
386
387
                ]
            ]
        )
        tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)