test_modeling_xlnet.py 17.7 KB
Newer Older
thomwolf's avatar
thomwolf committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Aymeric Augustin's avatar
Aymeric Augustin committed
15
from __future__ import absolute_import, division, print_function
thomwolf's avatar
thomwolf committed
16
17
18

import random

19
from transformers import is_torch_available
thomwolf's avatar
thomwolf committed
20

21
22
from .test_configuration_common import ConfigTester
from .test_modeling_common import CommonTestCases, ids_tensor
Aymeric Augustin's avatar
Aymeric Augustin committed
23
24
25
from .utils import CACHE_DIR, require_torch, slow, torch_device


26
if is_torch_available():
thomwolf's avatar
thomwolf committed
27
28
    import torch

29
30
31
32
33
34
35
36
    from transformers import (
        XLNetConfig,
        XLNetModel,
        XLNetLMHeadModel,
        XLNetForSequenceClassification,
        XLNetForTokenClassification,
        XLNetForQuestionAnswering,
    )
37
    from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
thomwolf's avatar
thomwolf committed
38

39
40

@require_torch
thomwolf's avatar
thomwolf committed
41
42
class XLNetModelTest(CommonTestCases.CommonModelTester):

43
44
45
46
47
48
49
50
51
52
53
    all_model_classes = (
        (
            XLNetModel,
            XLNetLMHeadModel,
            XLNetForTokenClassification,
            XLNetForSequenceClassification,
            XLNetForQuestionAnswering,
        )
        if is_torch_available()
        else ()
    )
thomwolf's avatar
thomwolf committed
54
    test_pruning = False
thomwolf's avatar
thomwolf committed
55

thomwolf's avatar
thomwolf committed
56
    class XLNetModelTester(object):
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            mem_len=10,
            clamp_len=-1,
            reuse_len=15,
            is_training=True,
            use_labels=True,
            vocab_size=99,
            cutoffs=[10, 50, 80],
            hidden_size=32,
            num_attention_heads=4,
            d_inner=128,
            num_hidden_layers=5,
            type_sequence_label_size=2,
            untie_r=True,
            bi_data=False,
            same_length=False,
            initializer_range=0.05,
            seed=1,
            type_vocab_size=2,
        ):
thomwolf's avatar
thomwolf committed
81
82
83
84
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.mem_len = mem_len
thomwolf's avatar
thomwolf committed
85
            # self.key_len = seq_length + mem_len
thomwolf's avatar
thomwolf committed
86
87
88
89
90
91
            self.clamp_len = clamp_len
            self.reuse_len = reuse_len
            self.is_training = is_training
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.cutoffs = cutoffs
thomwolf's avatar
thomwolf committed
92
93
            self.hidden_size = hidden_size
            self.num_attention_heads = num_attention_heads
thomwolf's avatar
thomwolf committed
94
            self.d_inner = d_inner
thomwolf's avatar
thomwolf committed
95
            self.num_hidden_layers = num_hidden_layers
thomwolf's avatar
thomwolf committed
96
97
98
            self.bi_data = bi_data
            self.untie_r = untie_r
            self.same_length = same_length
99
            self.initializer_range = initializer_range
thomwolf's avatar
thomwolf committed
100
101
            self.seed = seed
            self.type_vocab_size = type_vocab_size
thomwolf's avatar
thomwolf committed
102
            self.type_sequence_label_size = type_sequence_label_size
thomwolf's avatar
thomwolf committed
103
104

        def prepare_config_and_inputs(self):
thomwolf's avatar
thomwolf committed
105
106
107
            input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
            input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
            segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
thomwolf's avatar
thomwolf committed
108
            input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
thomwolf's avatar
thomwolf committed
109

thomwolf's avatar
thomwolf committed
110
            input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
111
112
113
            perm_mask = torch.zeros(
                self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device
            )
114
            perm_mask[:, :, -1] = 1.0  # Previous tokens don't see last token
115
116
117
            target_mapping = torch.zeros(
                self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device
            )
118
119
            target_mapping[:, 0, -1] = 1.0  # predict last token

thomwolf's avatar
thomwolf committed
120
            sequence_labels = None
thomwolf's avatar
thomwolf committed
121
            lm_labels = None
thomwolf's avatar
thomwolf committed
122
            is_impossible_labels = None
123
            token_labels = None
thomwolf's avatar
thomwolf committed
124
            if self.use_labels:
thomwolf's avatar
thomwolf committed
125
                lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
thomwolf's avatar
thomwolf committed
126
127
                sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
                is_impossible_labels = ids_tensor([self.batch_size], 2).float()
128
                token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
thomwolf's avatar
thomwolf committed
129
130

            config = XLNetConfig(
thomwolf's avatar
thomwolf committed
131
                vocab_size=self.vocab_size,
thomwolf's avatar
thomwolf committed
132
133
                d_model=self.hidden_size,
                n_head=self.num_attention_heads,
thomwolf's avatar
thomwolf committed
134
                d_inner=self.d_inner,
thomwolf's avatar
thomwolf committed
135
                n_layer=self.num_hidden_layers,
thomwolf's avatar
thomwolf committed
136
137
138
139
140
                untie_r=self.untie_r,
                mem_len=self.mem_len,
                clamp_len=self.clamp_len,
                same_length=self.same_length,
                reuse_len=self.reuse_len,
141
                bi_data=self.bi_data,
thomwolf's avatar
thomwolf committed
142
                initializer_range=self.initializer_range,
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
                num_labels=self.type_sequence_label_size,
            )

            return (
                config,
                input_ids_1,
                input_ids_2,
                input_ids_q,
                perm_mask,
                input_mask,
                target_mapping,
                segment_ids,
                lm_labels,
                sequence_labels,
                is_impossible_labels,
                token_labels,
            )
thomwolf's avatar
thomwolf committed
160
161
162
163
164

        def set_seed(self):
            random.seed(self.seed)
            torch.manual_seed(self.seed)

165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
        def create_and_check_xlnet_base_model(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
thomwolf's avatar
thomwolf committed
180
            model = XLNetModel(config)
181
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
182
183
            model.eval()

thomwolf's avatar
thomwolf committed
184
185
            _, _ = model(input_ids_1, input_mask=input_mask)
            _, _ = model(input_ids_1, attention_mask=input_mask)
thomwolf's avatar
thomwolf committed
186
187
188
189
190
191
192
193
            _, _ = model(input_ids_1, token_type_ids=segment_ids)
            outputs, mems_1 = model(input_ids_1)

            result = {
                "mems_1": mems_1,
                "outputs": outputs,
            }

thomwolf's avatar
thomwolf committed
194
195
            config.mem_len = 0
            model = XLNetModel(config)
196
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
197
            model.eval()
198
199
200
            no_mems_outputs = model(input_ids_1)
            self.parent.assertEqual(len(no_mems_outputs), 1)

thomwolf's avatar
thomwolf committed
201
            self.parent.assertListEqual(
202
203
                list(result["outputs"].size()), [self.batch_size, self.seq_length, self.hidden_size]
            )
thomwolf's avatar
thomwolf committed
204
205
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )

        def create_and_check_xlnet_base_model_with_att_output(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
224
            model = XLNetModel(config)
225
            model.to(torch_device)
226
227
228
229
230
231
232
233
234
            model.eval()

            _, _, attentions = model(input_ids_1, target_mapping=target_mapping)

            self.parent.assertEqual(len(attentions), config.n_layer)
            self.parent.assertIsInstance(attentions[0], tuple)
            self.parent.assertEqual(len(attentions[0]), 2)
            self.parent.assertTrue(attentions[0][0].shape, attentions[0][0].shape)

235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
        def create_and_check_xlnet_lm_head(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
thomwolf's avatar
thomwolf committed
250
            model = XLNetLMHeadModel(config)
251
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
252
253
            model.eval()

thomwolf's avatar
thomwolf committed
254
            loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels)
thomwolf's avatar
thomwolf committed
255

256
257
258
            loss_2, all_logits_2, mems_2 = model(
                input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1
            )
259

260
            logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping)
thomwolf's avatar
thomwolf committed
261

thomwolf's avatar
thomwolf committed
262
            result = {
thomwolf's avatar
thomwolf committed
263
                "loss_1": loss_1,
thomwolf's avatar
thomwolf committed
264
                "mems_1": mems_1,
265
                "all_logits_1": all_logits_1,
thomwolf's avatar
thomwolf committed
266
                "loss_2": loss_2,
thomwolf's avatar
thomwolf committed
267
                "mems_2": mems_2,
268
                "all_logits_2": all_logits_2,
thomwolf's avatar
thomwolf committed
269
270
            }

271
            self.parent.assertListEqual(list(result["loss_1"].size()), [])
thomwolf's avatar
thomwolf committed
272
            self.parent.assertListEqual(
273
274
                list(result["all_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size]
            )
thomwolf's avatar
thomwolf committed
275
            self.parent.assertListEqual(
thomwolf's avatar
thomwolf committed
276
                list(list(mem.size()) for mem in result["mems_1"]),
277
278
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )
thomwolf's avatar
thomwolf committed
279

280
            self.parent.assertListEqual(list(result["loss_2"].size()), [])
thomwolf's avatar
thomwolf committed
281
            self.parent.assertListEqual(
282
283
                list(result["all_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size]
            )
thomwolf's avatar
thomwolf committed
284
            self.parent.assertListEqual(
thomwolf's avatar
thomwolf committed
285
                list(list(mem.size()) for mem in result["mems_2"]),
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
                [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )

        def create_and_check_xlnet_qa(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
thomwolf's avatar
thomwolf committed
304
            model = XLNetForQuestionAnswering(config)
305
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
306
307
308
309
310
            model.eval()

            outputs = model(input_ids_1)
            start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits, mems = outputs

311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
            outputs = model(
                input_ids_1,
                start_positions=sequence_labels,
                end_positions=sequence_labels,
                cls_index=sequence_labels,
                is_impossible=is_impossible_labels,
                p_mask=input_mask,
            )

            outputs = model(
                input_ids_1,
                start_positions=sequence_labels,
                end_positions=sequence_labels,
                cls_index=sequence_labels,
                is_impossible=is_impossible_labels,
            )
thomwolf's avatar
thomwolf committed
327

328
            total_loss, mems = outputs
thomwolf's avatar
thomwolf committed
329

330
            outputs = model(input_ids_1, start_positions=sequence_labels, end_positions=sequence_labels)
thomwolf's avatar
thomwolf committed
331

332
            total_loss, mems = outputs
thomwolf's avatar
thomwolf committed
333
334
335

            result = {
                "loss": total_loss,
336
337
338
339
                "start_top_log_probs": start_top_log_probs,
                "start_top_index": start_top_index,
                "end_top_log_probs": end_top_log_probs,
                "end_top_index": end_top_index,
thomwolf's avatar
thomwolf committed
340
341
342
343
                "cls_logits": cls_logits,
                "mems": mems,
            }

344
            self.parent.assertListEqual(list(result["loss"].size()), [])
thomwolf's avatar
thomwolf committed
345
            self.parent.assertListEqual(
346
347
                list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top]
            )
thomwolf's avatar
thomwolf committed
348
            self.parent.assertListEqual(
349
350
                list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top]
            )
351
352
            self.parent.assertListEqual(
                list(result["end_top_log_probs"].size()),
353
354
                [self.batch_size, model.config.start_n_top * model.config.end_n_top],
            )
355
356
            self.parent.assertListEqual(
                list(result["end_top_index"].size()),
357
358
359
                [self.batch_size, model.config.start_n_top * model.config.end_n_top],
            )
            self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
thomwolf's avatar
thomwolf committed
360
361
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems"]),
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )

        def create_and_check_xlnet_token_classif(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
380
            model = XLNetForTokenClassification(config)
381
            model.to(torch_device)
382
383
384
385
386
387
388
389
390
391
392
            model.eval()

            logits, mems_1 = model(input_ids_1)
            loss, logits, mems_1 = model(input_ids_1, labels=token_labels)

            result = {
                "loss": loss,
                "mems_1": mems_1,
                "logits": logits,
            }

393
            self.parent.assertListEqual(list(result["loss"].size()), [])
394
            self.parent.assertListEqual(
395
396
                list(result["logits"].size()), [self.batch_size, self.seq_length, self.type_sequence_label_size]
            )
397
398
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
399
400
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )
401

402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
        def create_and_check_xlnet_sequence_classif(
            self,
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
            token_labels,
        ):
thomwolf's avatar
thomwolf committed
417
            model = XLNetForSequenceClassification(config)
418
            model.to(torch_device)
thomwolf's avatar
thomwolf committed
419
420
421
422
423
424
425
426
427
428
429
            model.eval()

            logits, mems_1 = model(input_ids_1)
            loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels)

            result = {
                "loss": loss,
                "mems_1": mems_1,
                "logits": logits,
            }

430
            self.parent.assertListEqual(list(result["loss"].size()), [])
thomwolf's avatar
thomwolf committed
431
            self.parent.assertListEqual(
432
433
                list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]
            )
thomwolf's avatar
thomwolf committed
434
435
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
436
437
                [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
            )
thomwolf's avatar
thomwolf committed
438

thomwolf's avatar
thomwolf committed
439
440
        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
            (
                config,
                input_ids_1,
                input_ids_2,
                input_ids_q,
                perm_mask,
                input_mask,
                target_mapping,
                segment_ids,
                lm_labels,
                sequence_labels,
                is_impossible_labels,
                token_labels,
            ) = config_and_inputs
            inputs_dict = {"input_ids": input_ids_1}
thomwolf's avatar
thomwolf committed
456
457
458
459
460
            return config, inputs_dict

    def setUp(self):
        self.model_tester = XLNetModelTest.XLNetModelTester(self)
        self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
thomwolf's avatar
thomwolf committed
461

thomwolf's avatar
thomwolf committed
462
    def test_config(self):
thomwolf's avatar
thomwolf committed
463
464
465
466
467
468
469
        self.config_tester.run_common_tests()

    def test_xlnet_base_model(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs)

470
471
472
473
474
475
    def test_xlnet_base_model_with_att_output(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        config_and_inputs[0].output_attentions = True
        self.model_tester.create_and_check_xlnet_base_model_with_att_output(*config_and_inputs)

thomwolf's avatar
thomwolf committed
476
477
478
    def test_xlnet_lm_head(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
479
        self.model_tester.create_and_check_xlnet_lm_head(*config_and_inputs)
thomwolf's avatar
thomwolf committed
480
481
482
483
484
485

    def test_xlnet_sequence_classif(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_sequence_classif(*config_and_inputs)

486
487
488
489
490
    def test_xlnet_token_classif(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_token_classif(*config_and_inputs)

thomwolf's avatar
thomwolf committed
491
492
493
494
    def test_xlnet_qa(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_qa(*config_and_inputs)
thomwolf's avatar
thomwolf committed
495

496
    @slow
thomwolf's avatar
thomwolf committed
497
    def test_model_from_pretrained(self):
498
        for model_name in list(XLNET_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
499
            model = XLNetModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
thomwolf's avatar
thomwolf committed
500
            self.assertIsNotNone(model)