test_modeling_tf_xlnet.py 22 KB
Newer Older
thomwolf's avatar
thomwolf committed
1
# coding=utf-8
Sylvain Gugger's avatar
Sylvain Gugger committed
2
# Copyright 2020 The HuggingFace Team. All rights reserved.
thomwolf's avatar
thomwolf committed
3
4
5
6
7
8
9
10
11
12
13
14
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Aymeric Augustin's avatar
Aymeric Augustin committed
15

thomwolf's avatar
thomwolf committed
16

17
import inspect
thomwolf's avatar
thomwolf committed
18
import random
19
import unittest
thomwolf's avatar
thomwolf committed
20

21
from transformers import XLNetConfig, is_tf_available
22
from transformers.testing_utils import require_tf, slow
thomwolf's avatar
thomwolf committed
23

Yih-Dar's avatar
Yih-Dar committed
24
25
from ...test_configuration_common import ConfigTester
from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
26
from ...test_pipeline_mixin import PipelineTesterMixin
Aymeric Augustin's avatar
Aymeric Augustin committed
27
28


thomwolf's avatar
thomwolf committed
29
30
31
if is_tf_available():
    import tensorflow as tf

Sylvain Gugger's avatar
Sylvain Gugger committed
32
    from transformers.models.xlnet.modeling_tf_xlnet import (
33
34
35
        TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST,
        TFXLNetForMultipleChoice,
        TFXLNetForQuestionAnsweringSimple,
36
37
        TFXLNetForSequenceClassification,
        TFXLNetForTokenClassification,
38
39
        TFXLNetLMHeadModel,
        TFXLNetModel,
40
41
    )

42

43
44
class TFXLNetModelTester:
    def __init__(
Lysandre's avatar
Lysandre committed
45
46
        self,
        parent,
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
    ):
        self.parent = parent
        self.batch_size = 13
        self.seq_length = 7
        self.mem_len = 10
        # self.key_len = seq_length + mem_len
        self.clamp_len = -1
        self.reuse_len = 15
        self.is_training = True
        self.use_labels = True
        self.vocab_size = 99
        self.cutoffs = [10, 50, 80]
        self.hidden_size = 32
        self.num_attention_heads = 4
        self.d_inner = 128
        self.num_hidden_layers = 5
        self.type_sequence_label_size = 2
        self.untie_r = True
        self.bi_data = False
        self.same_length = False
        self.initializer_range = 0.05
        self.seed = 1
        self.type_vocab_size = 2
        self.bos_token_id = 1
        self.eos_token_id = 2
        self.pad_token_id = 5
73
        self.num_choices = 4
74
75
76
77
78

    def prepare_config_and_inputs(self):
        input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
        input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
        segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
79
        input_mask = random_attention_mask([self.batch_size, self.seq_length], dtype=tf.float32)
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

        input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
        perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32)
        perm_mask_last = tf.ones((self.batch_size, self.seq_length + 1, 1), dtype=tf.float32)
        perm_mask = tf.concat([perm_mask, perm_mask_last], axis=-1)
        # perm_mask[:, :, -1] = 1.0  # Previous tokens don't see last token
        target_mapping = tf.zeros((self.batch_size, 1, self.seq_length), dtype=tf.float32)
        target_mapping_last = tf.ones((self.batch_size, 1, 1), dtype=tf.float32)
        target_mapping = tf.concat([target_mapping, target_mapping_last], axis=-1)
        # target_mapping[:, 0, -1] = 1.0  # predict last token

        sequence_labels = None
        lm_labels = None
        is_impossible_labels = None
        if self.use_labels:
            lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
            is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)

        config = XLNetConfig(
            vocab_size=self.vocab_size,
            d_model=self.hidden_size,
            n_head=self.num_attention_heads,
            d_inner=self.d_inner,
            n_layer=self.num_hidden_layers,
            untie_r=self.untie_r,
            mem_len=self.mem_len,
            clamp_len=self.clamp_len,
            same_length=self.same_length,
            reuse_len=self.reuse_len,
            bi_data=self.bi_data,
            initializer_range=self.initializer_range,
            num_labels=self.type_sequence_label_size,
            bos_token_id=self.bos_token_id,
            pad_token_id=self.pad_token_id,
            eos_token_id=self.eos_token_id,
116
        )
thomwolf's avatar
thomwolf committed
117

118
        return (
119
120
121
122
123
124
125
126
127
128
129
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
130
        )
thomwolf's avatar
thomwolf committed
131

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
    def set_seed(self):
        random.seed(self.seed)
        tf.random.set_seed(self.seed)

    def create_and_check_xlnet_base_model(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
    ):
        model = TFXLNetModel(config)

        inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
153
        result = model(inputs)
154
155

        inputs = [input_ids_1, input_mask]
Sylvain Gugger's avatar
Sylvain Gugger committed
156
        result = model(inputs)
157

158
        config.use_mems_eval = False
159
160
161
162
        model = TFXLNetModel(config)
        no_mems_outputs = model(inputs)
        self.parent.assertEqual(len(no_mems_outputs), 1)

163
        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
164
        self.parent.assertListEqual(
165
166
            [mem.shape for mem in result.mems],
            [(self.seq_length, self.batch_size, self.hidden_size)] * self.num_hidden_layers,
167
        )
thomwolf's avatar
thomwolf committed
168

169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    def create_and_check_xlnet_lm_head(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
    ):
        model = TFXLNetLMHeadModel(config)
thomwolf's avatar
thomwolf committed
184

185
        inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
186
        all_logits_1, mems_1 = model(inputs_1).to_tuple()
187

188
        inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
189
        all_logits_2, mems_2 = model(inputs_2).to_tuple()
190

191
        inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping}
Sylvain Gugger's avatar
Sylvain Gugger committed
192
        logits, _ = model(inputs_3).to_tuple()
193

194
        self.parent.assertEqual(all_logits_1.shape, (self.batch_size, self.seq_length, self.vocab_size))
195
        self.parent.assertListEqual(
196
197
            [mem.shape for mem in mems_1],
            [(self.seq_length, self.batch_size, self.hidden_size)] * self.num_hidden_layers,
198
        )
199
        self.parent.assertEqual(all_logits_2.shape, (self.batch_size, self.seq_length, self.vocab_size))
200
        self.parent.assertListEqual(
201
202
            [mem.shape for mem in mems_2],
            [(self.mem_len, self.batch_size, self.hidden_size)] * self.num_hidden_layers,
203
        )
204

205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
    def create_and_check_xlnet_qa(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
    ):
        model = TFXLNetForQuestionAnsweringSimple(config)

        inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
222
        result = model(inputs)
223

224
225
        self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
        self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
226
        self.parent.assertListEqual(
227
228
            [mem.shape for mem in result.mems],
            [(self.seq_length, self.batch_size, self.hidden_size)] * self.num_hidden_layers,
229
        )
230

231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
    def create_and_check_xlnet_sequence_classif(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
    ):
        model = TFXLNetForSequenceClassification(config)

Sylvain Gugger's avatar
Sylvain Gugger committed
247
        result = model(input_ids_1)
248

249
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
250
        self.parent.assertListEqual(
251
252
            [mem.shape for mem in result.mems],
            [(self.seq_length, self.batch_size, self.hidden_size)] * self.num_hidden_layers,
253
        )
254

255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
    def create_and_check_xlnet_for_token_classification(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
    ):
        config.num_labels = input_ids_1.shape[1]
        model = TFXLNetForTokenClassification(config)
        inputs = {
            "input_ids": input_ids_1,
            "attention_mask": input_mask,
            # 'token_type_ids': token_type_ids
        }
Sylvain Gugger's avatar
Sylvain Gugger committed
276
        result = model(inputs)
277
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, config.num_labels))
278
        self.parent.assertListEqual(
279
280
            [mem.shape for mem in result.mems],
            [(self.seq_length, self.batch_size, self.hidden_size)] * self.num_hidden_layers,
281
        )
282

283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
    def create_and_check_xlnet_for_multiple_choice(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
    ):
        config.num_choices = self.num_choices
        model = TFXLNetForMultipleChoice(config=config)
        multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids_1, 1), (1, self.num_choices, 1))
        multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
        multiple_choice_token_type_ids = tf.tile(tf.expand_dims(segment_ids, 1), (1, self.num_choices, 1))
        inputs = {
            "input_ids": multiple_choice_inputs_ids,
            "attention_mask": multiple_choice_input_mask,
            "token_type_ids": multiple_choice_token_type_ids,
        }
Sylvain Gugger's avatar
Sylvain Gugger committed
307
        result = model(inputs)
Julien Plu's avatar
Julien Plu committed
308

309
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
Julien Plu's avatar
Julien Plu committed
310
        self.parent.assertListEqual(
311
312
            [mem.shape for mem in result.mems],
            [(self.seq_length, self.batch_size * self.num_choices, self.hidden_size)] * self.num_hidden_layers,
Julien Plu's avatar
Julien Plu committed
313
        )
314

315
316
317
    def prepare_config_and_inputs_for_common(self):
        config_and_inputs = self.prepare_config_and_inputs()
        (
318
319
320
321
322
323
324
325
326
327
328
            config,
            input_ids_1,
            input_ids_2,
            input_ids_q,
            perm_mask,
            input_mask,
            target_mapping,
            segment_ids,
            lm_labels,
            sequence_labels,
            is_impossible_labels,
329
330
331
        ) = config_and_inputs
        inputs_dict = {"input_ids": input_ids_1}
        return config, inputs_dict
332
333


334
@require_tf
335
class TFXLNetModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
336
337
338
339
340
341
342
    all_model_classes = (
        (
            TFXLNetModel,
            TFXLNetLMHeadModel,
            TFXLNetForSequenceClassification,
            TFXLNetForTokenClassification,
            TFXLNetForQuestionAnsweringSimple,
343
            TFXLNetForMultipleChoice,
344
345
346
347
348
349
350
        )
        if is_tf_available()
        else ()
    )
    all_generative_model_classes = (
        (TFXLNetLMHeadModel,) if is_tf_available() else ()
    )  # TODO (PVP): Check other models whether language generation is also applicable
351
352
353
354
355
356
357
358
359
360
361
362
    pipeline_model_mapping = (
        {
            "feature-extraction": TFXLNetModel,
            "question-answering": TFXLNetForQuestionAnsweringSimple,
            "text-classification": TFXLNetForSequenceClassification,
            "text-generation": TFXLNetLMHeadModel,
            "token-classification": TFXLNetForTokenClassification,
            "zero-shot": TFXLNetForSequenceClassification,
        }
        if is_tf_available()
        else {}
    )
363
    test_head_masking = False
364
    test_onnx = False
thomwolf's avatar
thomwolf committed
365

366
367
368
369
370
371
372
    # TODO: Fix the failed tests
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
    ):
        # Exception encountered when calling layer '...'
        return True

thomwolf's avatar
thomwolf committed
373
    def setUp(self):
374
        self.model_tester = TFXLNetModelTester(self)
thomwolf's avatar
thomwolf committed
375
376
377
378
379
380
381
382
383
384
385
386
387
        self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_xlnet_base_model(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs)

    def test_xlnet_lm_head(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
388
        self.model_tester.create_and_check_xlnet_lm_head(*config_and_inputs)
thomwolf's avatar
thomwolf committed
389
390
391
392
393
394

    def test_xlnet_sequence_classif(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_sequence_classif(*config_and_inputs)

395
396
397
398
    def test_xlnet_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_for_token_classification(*config_and_inputs)

thomwolf's avatar
thomwolf committed
399
400
401
402
403
    def test_xlnet_qa(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_qa(*config_and_inputs)

404
405
406
407
    def test_xlnet_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_for_multiple_choice(*config_and_inputs)

408
    @slow
thomwolf's avatar
thomwolf committed
409
    def test_model_from_pretrained(self):
410
        for model_name in TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
411
            model = TFXLNetModel.from_pretrained(model_name)
thomwolf's avatar
thomwolf committed
412
            self.assertIsNotNone(model)
patrickvonplaten's avatar
patrickvonplaten committed
413

414
415
416
417
418
419
420
421
422
    # overwrite since `TFXLNetLMHeadModel` doesn't cut logits/labels
    def test_loss_computation(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        for model_class in self.all_model_classes:
            model = model_class(config)
            if getattr(model, "hf_compute_loss", None):
                # The number of elements in the loss should be the same as the number of elements in the label
                prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
                added_label = prepared_for_class[
423
                    sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]
424
                ]
Matt's avatar
Matt committed
425
                expected_loss_size = added_label.shape.as_list()[:1]
426
427
428
429
430
431
432
433
434
435
436
437
438

                # `TFXLNetLMHeadModel` doesn't cut logits/labels
                # if model.__class__ in get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING):
                #     # if loss is causal lm loss, labels are shift, so that one label per batch
                #     # is cut
                #     loss_size = loss_size - self.model_tester.batch_size

                # Test that model correctly compute the loss with kwargs
                prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
                input_name = "input_ids" if "input_ids" in prepared_for_class else "pixel_values"
                input_ids = prepared_for_class.pop(input_name)

                loss = model(input_ids, **prepared_for_class)[0]
439
                self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1])
440
441
442
443

                # Test that model correctly compute the loss with a dict
                prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
                loss = model(prepared_for_class)[0]
444
                self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1])
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474

                # Test that model correctly compute the loss with a tuple
                prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)

                # Get keys that were added with the _prepare_for_class function
                label_keys = prepared_for_class.keys() - inputs_dict.keys()
                signature = inspect.signature(model.call).parameters
                signature_names = list(signature.keys())

                # Create a dictionary holding the location of the tensors in the tuple
                tuple_index_mapping = {0: input_name}
                for label_key in label_keys:
                    label_key_index = signature_names.index(label_key)
                    tuple_index_mapping[label_key_index] = label_key
                sorted_tuple_index_mapping = sorted(tuple_index_mapping.items())
                # Initialize a list with their default values, update the values and convert to a tuple
                list_input = []

                for name in signature_names:
                    if name != "kwargs":
                        list_input.append(signature[name].default)

                for index, value in sorted_tuple_index_mapping:
                    list_input[index] = prepared_for_class[value]

                tuple_input = tuple(list_input)

                # Send to model
                loss = model(tuple_input[:-1])[0]

475
                self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1])
476

patrickvonplaten's avatar
patrickvonplaten committed
477

478
@require_tf
patrickvonplaten's avatar
patrickvonplaten committed
479
480
481
482
class TFXLNetModelLanguageGenerationTest(unittest.TestCase):
    @slow
    def test_lm_generate_xlnet_base_cased(self):
        model = TFXLNetLMHeadModel.from_pretrained("xlnet-base-cased")
483
        # fmt: off
patrickvonplaten's avatar
patrickvonplaten committed
484
485
486
        input_ids = tf.convert_to_tensor(
            [
                [
487
                    67, 2840, 19, 18, 1484, 20, 965, 29077, 8719, 1273, 21, 45, 273, 17, 10, 15048, 28, 27511, 21, 4185, 11, 41, 2444, 9, 32, 1025, 20, 8719, 26, 23, 673, 966, 19, 29077, 20643, 27511, 20822, 20643, 19, 17, 6616, 17511, 18, 8978, 20, 18, 777, 9, 19233, 1527, 17669, 19, 24, 673, 17, 28756, 150, 12943, 4354, 153, 27, 442, 37, 45, 668, 21, 24, 256, 20, 416, 22, 2771, 4901, 9, 12943, 4354, 153, 51, 24, 3004, 21, 28142, 23, 65, 20, 18, 416, 34, 24, 2958, 22947, 9, 1177, 45, 668, 3097, 13768, 23, 103, 28, 441, 148, 48, 20522, 19, 12943, 4354, 153, 12860, 34, 18, 326, 27, 17492, 684, 21, 6709, 9, 8585, 123, 266, 19, 12943, 4354, 153, 6872, 24, 3004, 20, 18, 9225, 2198, 19, 12717, 103, 22, 401, 24, 6348, 9, 12943, 4354, 153, 1068, 2768, 2286, 19, 33, 104, 19, 176, 24, 9313, 19, 20086, 28, 45, 10292, 9, 4, 3,
patrickvonplaten's avatar
patrickvonplaten committed
488
489
490
491
                ]
            ],
            dtype=tf.int32,
        )
492
493
        # fmt: on

patrickvonplaten's avatar
patrickvonplaten committed
494
495
496
497
498
499
500
501
502
503
504
        #  In 1991, the remains of Russian Tsar Nicholas II and his family
        #  (except for Alexei and Maria) are discovered.
        #  The voice of Nicholas's young son, Tsarevich Alexei Nikolaevich, narrates the
        #  remainder of the story. 1883 Western Siberia,
        #  a young Grigori Rasputin is asked by his father and a group of men to perform magic.
        #  Rasputin has a vision and denounces one of the men as a horse thief. Although his
        #  father initially slaps him for making such an accusation, Rasputin watches as the
        #  man is chased outside and beaten. Twenty years later, Rasputin sees a vision of
        #  the Virgin Mary, prompting him to become a priest. Rasputin quickly becomes famous,
        #  with people, even a bishop, begging for his blessing. """

505
        # fmt: off
patrickvonplaten's avatar
patrickvonplaten committed
506
        expected_output_ids = [
507
            67, 2840, 19, 18, 1484, 20, 965, 29077, 8719, 1273, 21, 45, 273, 17, 10, 15048, 28, 27511, 21, 4185, 11, 41, 2444, 9, 32, 1025, 20, 8719, 26, 23, 673, 966, 19, 29077, 20643, 27511, 20822, 20643, 19, 17, 6616, 17511, 18, 8978, 20, 18, 777, 9, 19233, 1527, 17669, 19, 24, 673, 17, 28756, 150, 12943, 4354, 153, 27, 442, 37, 45, 668, 21, 24, 256, 20, 416, 22, 2771, 4901, 9, 12943, 4354, 153, 51, 24, 3004, 21, 28142, 23, 65, 20, 18, 416, 34, 24, 2958, 22947, 9, 1177, 45, 668, 3097, 13768, 23, 103, 28, 441, 148, 48, 20522, 19, 12943, 4354, 153, 12860, 34, 18, 326, 27, 17492, 684, 21, 6709, 9, 8585, 123, 266, 19, 12943, 4354, 153, 6872, 24, 3004, 20, 18, 9225, 2198, 19, 12717, 103, 22, 401, 24, 6348, 9, 12943, 4354, 153, 1068, 2768, 2286, 19, 33, 104, 19, 176, 24, 9313, 19, 20086, 28, 45, 10292, 9, 4, 3, 19, 12943, 4354, 153, 27, 442, 22, 2771, 4901, 9, 69, 27, 442, 22, 2771, 24, 11335, 20, 18, 9225, 2198, 9, 69, 27, 442, 22, 2771, 24, 11335, 20, 18, 9225, 2198, 9, 69, 27, 442, 22, 2771,
patrickvonplaten's avatar
patrickvonplaten committed
508
        ]
509
        # fmt: on
patrickvonplaten's avatar
patrickvonplaten committed
510
511
512
513
514
515
516
517
        #  In 1991, the remains of Russian Tsar Nicholas II and his family (except for Alexei and Maria)
        #  are discovered. The voice of Nicholas's young son, Tsarevich Alexei Nikolaevich,
        #  narrates the remainder of the story. 1883 Western Siberia, a young Grigori Rasputin
        #  is asked by his father and a group of men to perform magic. Rasputin has a vision and
        #  denounces one of the men as a horse thief. Although his father initially slaps
        #  him for making such an accusation, Rasputin watches as the man is chased outside and beaten.
        #  Twenty years later, Rasputin sees a vision of the Virgin Mary, prompting him to become a priest.
        #  Rasputin quickly becomes famous, with people, even a bishop, begging for his blessing.
518
519
        #  <sep><cls>, Rasputin is asked to perform magic. He is asked to perform a ritual of the Virgin Mary.
        #  He is asked to perform a ritual of the Virgin Mary. He is asked to perform
patrickvonplaten's avatar
patrickvonplaten committed
520
521
522

        output_ids = model.generate(input_ids, max_length=200, do_sample=False)

523
        self.assertListEqual(output_ids[0].numpy().tolist(), expected_output_ids)