test_modeling_xlnet.py 17.7 KB
Newer Older
thomwolf's avatar
thomwolf committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Aymeric Augustin's avatar
Aymeric Augustin committed
15
from __future__ import absolute_import, division, print_function
thomwolf's avatar
thomwolf committed
16
17

import random
Aymeric Augustin's avatar
Aymeric Augustin committed
18
import unittest
thomwolf's avatar
thomwolf committed
19

20
from transformers import is_torch_available
thomwolf's avatar
thomwolf committed
21

22
23
from .test_configuration_common import ConfigTester
from .test_modeling_common import CommonTestCases, ids_tensor
Aymeric Augustin's avatar
Aymeric Augustin committed
24
25
26
from .utils import CACHE_DIR, require_torch, slow, torch_device


27
if is_torch_available():
thomwolf's avatar
thomwolf committed
28
29
    import torch

30
31
32
33
34
35
36
37
    from transformers import (
        XLNetConfig,
        XLNetModel,
        XLNetLMHeadModel,
        XLNetForSequenceClassification,
        XLNetForTokenClassification,
        XLNetForQuestionAnswering,
    )
38
    from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
thomwolf's avatar
thomwolf committed
39

40
41

@require_torch
thomwolf's avatar
thomwolf committed
42
43
class XLNetModelTest(CommonTestCases.CommonModelTester):

44
45
46
47
48
49
50
51
52
53
54
    all_model_classes = (
        (
            XLNetModel,
            XLNetLMHeadModel,
            XLNetForTokenClassification,
            XLNetForSequenceClassification,
            XLNetForQuestionAnswering,
        )
        if is_torch_available()
        else ()
    )
thomwolf's avatar
thomwolf committed
55
    test_pruning = False
thomwolf's avatar
thomwolf committed
56

thomwolf's avatar
thomwolf committed
57
    class XLNetModelTester(object):
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            mem_len=10,
            clamp_len=-1,
            reuse_len=15,
            is_training=True,
            use_labels=True,
            vocab_size=99,
            cutoffs=[10, 50, 80],
            hidden_size=32,
            num_attention_heads=4,
            d_inner=128,
            num_hidden_layers=5,
            type_sequence_label_size=2,
            untie_r=True,
            bi_data=False,
            same_length=False,
            initializer_range=0.05,
            seed=1,
            type_vocab_size=2,
        ):
thomwolf's avatar
thomwolf committed
82
83
84
85
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.mem_len = mem_len
thomwolf's avatar
thomwolf committed
86
            # self.key_len = seq_length + mem_len
thomwolf's avatar
thomwolf committed
87
88
89
90
91
92
            self.clamp_len = clamp_len
            self.reuse_len = reuse_len
            self.is_training = is_training
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.cutoffs = cutoffs
thomwolf's avatar
thomwolf committed
93
94
            self.hidden_size = hidden_size
            self.num_attention_heads = num_attention_heads
thomwolf's avatar
thomwolf committed
95
            self.d_inner = d_inner
thomwolf's avatar
thomwolf committed
96
            self.num_hidden_layers = num_hidden_layers
thomwolf's avatar
thomwolf committed
97
98
99
            self.bi_data = bi_data
            self.untie_r = untie_r
            self.same_length = same_length
100
            self.initializer_range = initializer_range
thomwolf's avatar
thomwolf committed
101
102
            self.seed = seed
            self.type_vocab_size = type_vocab_size
thomwolf's avatar
thomwolf committed
103
            self.type_sequence_label_size = type_sequence_label_size
thomwolf's avatar
thomwolf committed
104
105

        def prepare_config_and_inputs(self):
thomwolf's avatar
thomwolf committed
106
107
108
            input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
            input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
            segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
thomwolf's avatar
thomwolf committed
109
            input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
thomwolf's avatar
thomwolf committed
110

thomwolf's avatar
thomwolf committed
111
            input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
112
113
114
            perm_mask = torch.zeros(
                self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device
            )
115
            perm_mask[:, :, -1] = 1.0  # Previous tokens don't see last token
116
117
118
            target_mapping = torch.zeros(
                self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device
            )
119
120
            target_mapping[:, 0, -1] = 1.0  # predict last token

thomwolf's avatar
thomwolf committed
121
            sequence_labels = None
thomwolf's avatar
thomwolf committed
122
            lm_labels = None
thomwolf's avatar
thomwolf committed
123
            is_impossible_labels = None
124
            token_labels = None
thomwolf's avatar
thomwolf committed
125
            if self.use_labels:
thomwolf's avatar
thomwolf committed
126
                lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
thomwolf's avatar
thomwolf committed
127
128
                sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
                is_impossible_labels = ids_tensor([self.batch_size], 2).float()
129
                token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
thomwolf's avatar
thomwolf committed
130
131

            config = XLNetConfig(
thomwolf's avatar
thomwolf committed
132
                vocab_size=self.vocab_size,
thomwolf's avatar
thomwolf committed
133
134
                d_model=self.hidden_size,
                n_head=self.num_attention_heads,
thomwolf's avatar
thomwolf committed
135
                d_inner=self.d_inner,
thomwolf's avatar
thomwolf committed
136
                n_layer=self.num_hidden_layers,
thomwolf's avatar
thomwolf committed
137
138
139
140
141
                untie_r=self.untie_r,
                mem_len=self.mem_len,
                clamp_len=self.clamp_len,
                same_length=self.same_length,
                reuse_len=self.reuse_len,
142
                bi_data=self.bi_data,
thomwolf's avatar
thomwolf committed
143
                initializer_range=self.initializer_range,
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
                num_labels=self.type_sequence_label_size,
            )

            return (
                config,
                input_ids_1,
                input_ids_2,
                input_ids_q,
                perm_mask,
                input_mask,
                target_mapping,
                segment_ids,
                lm_labels,
                sequence_labels,
                is_impossible_labels,
                token_labels,
            )
thomwolf's avatar
thomwolf committed
161
162
163
164
165

        def set_seed(self):
            random.seed(self.seed)
            torch.manual_seed(self.seed)

166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
        def create_and_check_xlnet_base_model(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
thomwolf's avatar
thomwolf committed
181
            model = XLNetModel(config)
182
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
183
184
            model.eval()

thomwolf's avatar
thomwolf committed
185
186
            _, _ = model(input_ids_1, input_mask=input_mask)
            _, _ = model(input_ids_1, attention_mask=input_mask)
thomwolf's avatar
thomwolf committed
187
188
189
190
191
192
193
194
            _, _ = model(input_ids_1, token_type_ids=segment_ids)
            outputs, mems_1 = model(input_ids_1)

            result = {
                "mems_1": mems_1,
                "outputs": outputs,
            }

thomwolf's avatar
thomwolf committed
195
196
            config.mem_len = 0
            model = XLNetModel(config)
197
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
198
            model.eval()
199
200
201
            no_mems_outputs = model(input_ids_1)
            self.parent.assertEqual(len(no_mems_outputs), 1)

thomwolf's avatar
thomwolf committed
202
            self.parent.assertListEqual(
203
204
                list(result["outputs"].size()), [self.batch_size, self.seq_length, self.hidden_size]
            )
thomwolf's avatar
thomwolf committed
205
206
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )

        def create_and_check_xlnet_base_model_with_att_output(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
225
            model = XLNetModel(config)
226
            model.to(torch_device)
227
228
229
230
231
232
233
234
235
            model.eval()

            _, _, attentions = model(input_ids_1, target_mapping=target_mapping)

            self.parent.assertEqual(len(attentions), config.n_layer)
            self.parent.assertIsInstance(attentions[0], tuple)
            self.parent.assertEqual(len(attentions[0]), 2)
            self.parent.assertTrue(attentions[0][0].shape, attentions[0][0].shape)

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
        def create_and_check_xlnet_lm_head(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
thomwolf's avatar
thomwolf committed
251
            model = XLNetLMHeadModel(config)
252
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
253
254
            model.eval()

thomwolf's avatar
thomwolf committed
255
            loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels)
thomwolf's avatar
thomwolf committed
256

257
258
259
            loss_2, all_logits_2, mems_2 = model(
                input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1
            )
260

261
            logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping)
thomwolf's avatar
thomwolf committed
262

thomwolf's avatar
thomwolf committed
263
            result = {
thomwolf's avatar
thomwolf committed
264
                "loss_1": loss_1,
thomwolf's avatar
thomwolf committed
265
                "mems_1": mems_1,
266
                "all_logits_1": all_logits_1,
thomwolf's avatar
thomwolf committed
267
                "loss_2": loss_2,
thomwolf's avatar
thomwolf committed
268
                "mems_2": mems_2,
269
                "all_logits_2": all_logits_2,
thomwolf's avatar
thomwolf committed
270
271
            }

272
            self.parent.assertListEqual(list(result["loss_1"].size()), [])
thomwolf's avatar
thomwolf committed
273
            self.parent.assertListEqual(
274
275
                list(result["all_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size]
            )
thomwolf's avatar
thomwolf committed
276
            self.parent.assertListEqual(
thomwolf's avatar
thomwolf committed
277
                list(list(mem.size()) for mem in result["mems_1"]),
278
279
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )
thomwolf's avatar
thomwolf committed
280

281
            self.parent.assertListEqual(list(result["loss_2"].size()), [])
thomwolf's avatar
thomwolf committed
282
            self.parent.assertListEqual(
283
284
                list(result["all_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size]
            )
thomwolf's avatar
thomwolf committed
285
            self.parent.assertListEqual(
thomwolf's avatar
thomwolf committed
286
                list(list(mem.size()) for mem in result["mems_2"]),
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
                [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )

        def create_and_check_xlnet_qa(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
thomwolf's avatar
thomwolf committed
305
            model = XLNetForQuestionAnswering(config)
306
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
307
308
309
310
311
            model.eval()

            outputs = model(input_ids_1)
            start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits, mems = outputs

312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
            outputs = model(
                input_ids_1,
                start_positions=sequence_labels,
                end_positions=sequence_labels,
                cls_index=sequence_labels,
                is_impossible=is_impossible_labels,
                p_mask=input_mask,
            )

            outputs = model(
                input_ids_1,
                start_positions=sequence_labels,
                end_positions=sequence_labels,
                cls_index=sequence_labels,
                is_impossible=is_impossible_labels,
            )
thomwolf's avatar
thomwolf committed
328

329
            total_loss, mems = outputs
thomwolf's avatar
thomwolf committed
330

331
            outputs = model(input_ids_1, start_positions=sequence_labels, end_positions=sequence_labels)
thomwolf's avatar
thomwolf committed
332

333
            total_loss, mems = outputs
thomwolf's avatar
thomwolf committed
334
335
336

            result = {
                "loss": total_loss,
337
338
339
340
                "start_top_log_probs": start_top_log_probs,
                "start_top_index": start_top_index,
                "end_top_log_probs": end_top_log_probs,
                "end_top_index": end_top_index,
thomwolf's avatar
thomwolf committed
341
342
343
344
                "cls_logits": cls_logits,
                "mems": mems,
            }

345
            self.parent.assertListEqual(list(result["loss"].size()), [])
thomwolf's avatar
thomwolf committed
346
            self.parent.assertListEqual(
347
348
                list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top]
            )
thomwolf's avatar
thomwolf committed
349
            self.parent.assertListEqual(
350
351
                list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top]
            )
352
353
            self.parent.assertListEqual(
                list(result["end_top_log_probs"].size()),
354
355
                [self.batch_size, model.config.start_n_top * model.config.end_n_top],
            )
356
357
            self.parent.assertListEqual(
                list(result["end_top_index"].size()),
358
359
360
                [self.batch_size, model.config.start_n_top * model.config.end_n_top],
            )
            self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
thomwolf's avatar
thomwolf committed
361
362
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems"]),
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )

        def create_and_check_xlnet_token_classif(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
381
            model = XLNetForTokenClassification(config)
382
            model.to(torch_device)
383
384
385
386
387
388
389
390
391
392
393
            model.eval()

            logits, mems_1 = model(input_ids_1)
            loss, logits, mems_1 = model(input_ids_1, labels=token_labels)

            result = {
                "loss": loss,
                "mems_1": mems_1,
                "logits": logits,
            }

394
            self.parent.assertListEqual(list(result["loss"].size()), [])
395
            self.parent.assertListEqual(
396
397
                list(result["logits"].size()), [self.batch_size, self.seq_length, self.type_sequence_label_size]
            )
398
399
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
400
401
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )
402

403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
        def create_and_check_xlnet_sequence_classif(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
thomwolf's avatar
thomwolf committed
418
            model = XLNetForSequenceClassification(config)
419
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
420
421
422
423
424
425
426
427
428
429
430
            model.eval()

            logits, mems_1 = model(input_ids_1)
            loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels)

            result = {
                "loss": loss,
                "mems_1": mems_1,
                "logits": logits,
            }

431
            self.parent.assertListEqual(list(result["loss"].size()), [])
thomwolf's avatar
thomwolf committed
432
            self.parent.assertListEqual(
433
434
                list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]
            )
thomwolf's avatar
thomwolf committed
435
436
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
437
438
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )
thomwolf's avatar
thomwolf committed
439

thomwolf's avatar
thomwolf committed
440
441
        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
            (
                config,
                input_ids_1,
                input_ids_2,
                input_ids_q,
                perm_mask,
                input_mask,
                target_mapping,
                segment_ids,
                lm_labels,
                sequence_labels,
                is_impossible_labels,
                token_labels,
            ) = config_and_inputs
            inputs_dict = {"input_ids": input_ids_1}
thomwolf's avatar
thomwolf committed
457
458
459
460
461
            return config, inputs_dict

    def setUp(self):
        self.model_tester = XLNetModelTest.XLNetModelTester(self)
        self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
thomwolf's avatar
thomwolf committed
462

thomwolf's avatar
thomwolf committed
463
    def test_config(self):
thomwolf's avatar
thomwolf committed
464
465
466
467
468
469
470
        self.config_tester.run_common_tests()

    def test_xlnet_base_model(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs)

471
472
473
474
475
476
    def test_xlnet_base_model_with_att_output(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        config_and_inputs[0].output_attentions = True
        self.model_tester.create_and_check_xlnet_base_model_with_att_output(*config_and_inputs)

thomwolf's avatar
thomwolf committed
477
478
479
    def test_xlnet_lm_head(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
480
        self.model_tester.create_and_check_xlnet_lm_head(*config_and_inputs)
thomwolf's avatar
thomwolf committed
481
482
483
484
485
486

    def test_xlnet_sequence_classif(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_sequence_classif(*config_and_inputs)

487
488
489
490
491
    def test_xlnet_token_classif(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_token_classif(*config_and_inputs)

thomwolf's avatar
thomwolf committed
492
493
494
495
    def test_xlnet_qa(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_qa(*config_and_inputs)
thomwolf's avatar
thomwolf committed
496

497
    @slow
thomwolf's avatar
thomwolf committed
498
    def test_model_from_pretrained(self):
499
        for model_name in list(XLNET_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
500
            model = XLNetModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
thomwolf's avatar
thomwolf committed
501
502
503
504
505
            self.assertIsNotNone(model)


if __name__ == "__main__":
    unittest.main()