test_modeling_tf_common.py 28.6 KB
Newer Older
thomwolf's avatar
thomwolf committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2019 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Aymeric Augustin's avatar
Aymeric Augustin committed
15

thomwolf's avatar
thomwolf committed
16
17

import copy
Aymeric Augustin's avatar
Aymeric Augustin committed
18
import os
thomwolf's avatar
thomwolf committed
19
import random
Aymeric Augustin's avatar
Aymeric Augustin committed
20
import tempfile
21
import unittest
22
from importlib import import_module
thomwolf's avatar
thomwolf committed
23

24
from transformers import is_tf_available, is_torch_available
25

Julien Chaumond's avatar
Julien Chaumond committed
26
from .utils import _tf_gpu_memory_limit, require_tf
27

Aymeric Augustin's avatar
Aymeric Augustin committed
28

29
if is_tf_available():
thomwolf's avatar
thomwolf committed
30
    import tensorflow as tf
thomwolf's avatar
thomwolf committed
31
    import numpy as np
32

Julien Plu's avatar
Julien Plu committed
33
    from transformers import tf_top_k_top_p_filtering, TFAdaptiveEmbedding, TFSharedEmbeddings
34

Julien Chaumond's avatar
Julien Chaumond committed
35
36
37
38
39
40
41
42
43
44
45
46
47
    if _tf_gpu_memory_limit is not None:
        gpus = tf.config.list_physical_devices("GPU")
        for gpu in gpus:
            # Restrict TensorFlow to only allocate x GB of memory on the GPUs
            try:
                tf.config.experimental.set_virtual_device_configuration(
                    gpu, [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]
                )
                logical_gpus = tf.config.experimental.list_logical_devices("GPU")
                print("Logical GPUs", logical_gpus)
            except RuntimeError as e:
                # Virtual devices must be set before GPUs have been initialized
                print(e)
thomwolf's avatar
thomwolf committed
48

49

thomwolf's avatar
thomwolf committed
50
51
52
def _config_zero_init(config):
    configs_no_init = copy.deepcopy(config)
    for key in configs_no_init.__dict__.keys():
53
        if "_range" in key or "_std" in key:
thomwolf's avatar
thomwolf committed
54
55
56
57
            setattr(configs_no_init, key, 0.0)
    return configs_no_init


58
59
@require_tf
class TFModelTesterMixin:
60

61
62
    model_tester = None
    all_model_classes = ()
63
    all_generative_model_classes = ()
64
65
66
67
    test_torchscript = True
    test_pruning = True
    test_resize_embeddings = True
    is_encoder_decoder = False
68

69
70
71
    def test_initialization(self):
        pass
        # config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
72

73
74
75
76
77
78
79
        # configs_no_init = _config_zero_init(config)
        # for model_class in self.all_model_classes:
        #     model = model_class(config=configs_no_init)
        #     for name, param in model.named_parameters():
        #         if param.requires_grad:
        #             self.assertIn(param.data.mean().item(), [0.0, 1.0],
        #             msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
80

81
82
    def test_save_load(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
83

84
85
86
        for model_class in self.all_model_classes:
            model = model_class(config)
            outputs = model(inputs_dict)
87

88
            with tempfile.TemporaryDirectory() as tmpdirname:
89
90
91
92
                model.save_pretrained(tmpdirname)
                model = model_class.from_pretrained(tmpdirname)
                after_outputs = model(inputs_dict)

93
                self.assert_outputs_same(after_outputs, outputs)
94

95
96
97
98
99
100
101
102
    def test_keras_save_load(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        tf_main_layer_classes = set(
            module_member
            for model_class in self.all_model_classes
            for module in (import_module(model_class.__module__),)
            for module_member_name in dir(module)
103
            if module_member_name.endswith("MainLayer")
104
            for module_member in (getattr(module, module_member_name),)
105
106
107
            if isinstance(module_member, type)
            and tf.keras.layers.Layer in module_member.__bases__
            and getattr(module_member, "_keras_serializable", False)
108
109
        )
        for main_layer_class in tf_main_layer_classes:
Julien Plu's avatar
Julien Plu committed
110
111
112
113
114
115
116
            # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
            if "T5" in main_layer_class.__name__:
                # Take the same values than in TFT5ModelTester for this shared layer
                shared = TFSharedEmbeddings(99, 32, name="shared")
                main_layer = main_layer_class(config, embed_tokens=shared)
            else:
                main_layer = main_layer_class(config)
117
118
119
            symbolic_inputs = {
                name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
            }
Julien Plu's avatar
Julien Plu committed
120

121
122
123
124
125
126
            model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
            outputs = model(inputs_dict)

            with tempfile.TemporaryDirectory() as tmpdirname:
                filepath = os.path.join(tmpdirname, "keras_model.h5")
                model.save(filepath)
Julien Plu's avatar
Julien Plu committed
127
128
129
130
131
132
133
134
135
136
137
138
                if "T5" in main_layer_class.__name__:
                    model = tf.keras.models.load_model(
                        filepath,
                        custom_objects={
                            main_layer_class.__name__: main_layer_class,
                            "TFSharedEmbeddings": TFSharedEmbeddings,
                        },
                    )
                else:
                    model = tf.keras.models.load_model(
                        filepath, custom_objects={main_layer_class.__name__: main_layer_class}
                    )
139
140
141
142
143
144
                assert isinstance(model, tf.keras.Model)
                after_outputs = model(inputs_dict)
                self.assert_outputs_same(after_outputs, outputs)

    def assert_outputs_same(self, after_outputs, outputs):
        # Make sure we don't have nans
Julien Plu's avatar
Julien Plu committed
145
146
147
148
        if isinstance(after_outputs, tf.Tensor):
            out_1 = after_outputs.numpy()
        else:
            out_1 = after_outputs[0].numpy()
149
        out_2 = outputs[0].numpy()
150
        self.assertEqual(out_1.shape, out_2.shape)
151
152
153
154
        out_1 = out_1[~np.isnan(out_1)]
        out_2 = out_2[~np.isnan(out_2)]
        max_diff = np.amax(np.abs(out_1 - out_2))
        self.assertLessEqual(max_diff, 1e-5)
155

156
157
158
    def test_pt_tf_model_equivalence(self):
        if not is_torch_available():
            return
thomwolf's avatar
thomwolf committed
159

160
161
        import torch
        import transformers
thomwolf's avatar
thomwolf committed
162

163
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
thomwolf's avatar
thomwolf committed
164

165
166
167
        for model_class in self.all_model_classes:
            pt_model_class_name = model_class.__name__[2:]  # Skip the "TF" at the beggining
            pt_model_class = getattr(transformers, pt_model_class_name)
thomwolf's avatar
thomwolf committed
168

169
            config.output_hidden_states = True
170

171
172
            tf_model = model_class(config)
            pt_model = pt_model_class(config)
thomwolf's avatar
thomwolf committed
173

174
            # Check we can load pt model in tf and vice-versa with model => model functions
175

176
177
            tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=inputs_dict)
            pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)
178

179
180
181
182
            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = dict(
                (name, torch.from_numpy(key.numpy()).to(torch.long)) for name, key in inputs_dict.items()
183
            )
184
185
186
187
            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

188
189
190
191
192
            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(inputs_dict, training=False)
            tf_hidden_states = tfo[0].numpy()
            pt_hidden_states = pto[0].numpy()
Lysandre's avatar
Lysandre committed
193

194
195
196
197
198
199
200
            tf_nans = np.copy(np.isnan(tf_hidden_states))
            pt_nans = np.copy(np.isnan(pt_hidden_states))

            pt_hidden_states[tf_nans] = 0
            tf_hidden_states[tf_nans] = 0
            pt_hidden_states[pt_nans] = 0
            tf_hidden_states[pt_nans] = 0
Lysandre's avatar
Lysandre committed
201

202
            max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
203
204
205
206
207
208
209
            # Debug info (remove when fixed)
            if max_diff >= 2e-2:
                print("===")
                print(model_class)
                print(config)
                print(inputs_dict)
                print(pt_inputs_dict)
210
211
212
            self.assertLessEqual(max_diff, 2e-2)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
213
            with tempfile.TemporaryDirectory() as tmpdirname:
214
215
216
217
218
219
220
221
222
223
224
225
                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = dict(
                (name, torch.from_numpy(key.numpy()).to(torch.long)) for name, key in inputs_dict.items()
226
            )
227
228
229
230
            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

231
232
233
234
235
            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(inputs_dict)
            tfo = tfo[0].numpy()
            pto = pto[0].numpy()
236
237
238
239
240
241
242
243
            tf_nans = np.copy(np.isnan(tfo))
            pt_nans = np.copy(np.isnan(pto))

            pto[tf_nans] = 0
            tfo[tf_nans] = 0
            pto[pt_nans] = 0
            tfo[pt_nans] = 0

244
245
246
247
248
249
250
251
252
            max_diff = np.amax(np.abs(tfo - pto))
            self.assertLessEqual(max_diff, 2e-2)

    def test_compile_tf_model(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        if self.is_encoder_decoder:
            input_ids = {
                "decoder_input_ids": tf.keras.Input(batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"),
253
                "inputs": tf.keras.Input(batch_shape=(2, 2000), name="inputs", dtype="int32"),
254
255
256
257
258
259
260
261
262
263
264
265
            }
        else:
            input_ids = tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32")
        optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")

        for model_class in self.all_model_classes:
            # Prepare our model
            model = model_class(config)

            # Let's load it from the disk to be sure we can use pretrained weights
266
            with tempfile.TemporaryDirectory() as tmpdirname:
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
                outputs = model(inputs_dict)  # build the model
                model.save_pretrained(tmpdirname)
                model = model_class.from_pretrained(tmpdirname)

            outputs_dict = model(input_ids)
            hidden_states = outputs_dict[0]

            # Add a dense layer on top to test intetgration with other keras modules
            outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)

            # Compile extended model
            extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
            extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

    def test_keyword_and_dict_args(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            outputs_dict = model(inputs_dict)

            inputs_keywords = copy.deepcopy(inputs_dict)
289
            input_ids = inputs_keywords.pop("input_ids" if not self.is_encoder_decoder else "inputs", None,)
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
            outputs_keywords = model(input_ids, **inputs_keywords)
            output_dict = outputs_dict[0].numpy()
            output_keywords = outputs_keywords[0].numpy()

            self.assertLess(np.sum(np.abs(output_dict - output_keywords)), 1e-6)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        decoder_seq_length = (
            self.model_tester.decoder_seq_length
            if hasattr(self.model_tester, "decoder_seq_length")
            else self.model_tester.seq_length
        )
        encoder_seq_length = (
            self.model_tester.encoder_seq_length
            if hasattr(self.model_tester, "encoder_seq_length")
            else self.model_tester.seq_length
        )
        decoder_key_length = (
            self.model_tester.key_length if hasattr(self.model_tester, "key_length") else decoder_seq_length
        )
        encoder_key_length = (
            self.model_tester.key_length if hasattr(self.model_tester, "key_length") else encoder_seq_length
        )

        for model_class in self.all_model_classes:
317
            inputs_dict["output_attentions"] = True
318
319
320
321
322
323
324
325
326
            config.output_hidden_states = False
            model = model_class(config)
            outputs = model(inputs_dict)
            attentions = [t.numpy() for t in outputs[-1]]
            self.assertEqual(model.config.output_hidden_states, False)
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
327
            )
328
            out_len = len(outputs)
thomwolf's avatar
thomwolf committed
329

330
331
332
            if self.is_encoder_decoder:
                self.assertEqual(out_len % 2, 0)
                decoder_attentions = outputs[(out_len // 2) - 1]
333
                self.assertEqual(model.config.output_hidden_states, False)
334
                self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
335
                self.assertListEqual(
336
337
                    list(decoder_attentions[0].shape[-3:]),
                    [self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length],
338
                )
thomwolf's avatar
thomwolf committed
339

340
341
            # Check that output attentions can also be changed via the config
            del inputs_dict["output_attentions"]
342
            config.output_attentions = True
343
344
345
346
347
348
349
350
351
352
353
354
            model = model_class(config)
            outputs = model(inputs_dict)
            attentions = [t.numpy() for t in outputs[-1]]
            self.assertEqual(model.config.output_hidden_states, False)
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
            )

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
355
356
357
358
359
360
361
362
363
364
365
366
            config.output_hidden_states = True
            model = model_class(config)
            outputs = model(inputs_dict)
            self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs))
            self.assertEqual(model.config.output_hidden_states, True)

            attentions = [t.numpy() for t in outputs[-1]]
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
            )
367

368
369
370
371
372
373
374
375
376
377
378
    def test_hidden_states_output(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            config.output_hidden_states = True
            model = model_class(config)
            outputs = model(inputs_dict)
            hidden_states = [t.numpy() for t in outputs[-1]]
            self.assertEqual(model.config.output_hidden_states, True)
            self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
            self.assertListEqual(
379
                list(hidden_states[0].shape[-2:]), [self.model_tester.seq_length, self.model_tester.hidden_size],
380
            )
381

382
383
384
385
386
    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
387
            assert isinstance(model.get_input_embeddings(), (tf.keras.layers.Layer, TFAdaptiveEmbedding))
388
389
390
391
392
393
394
395
            x = model.get_output_embeddings()
            assert x is None or isinstance(x, tf.keras.layers.Layer)

    def test_determinism(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
396
397
398
399
            first, second = (
                model(inputs_dict, training=False)[0],
                model(inputs_dict, training=False)[0],
            )
400
401
402
403
404
405
406
407
408
409
410
411
412
413
            out_1 = first.numpy()
            out_2 = second.numpy()
            out_1 = out_1[~np.isnan(out_1)]
            out_2 = out_2[~np.isnan(out_2)]
            max_diff = np.amax(np.abs(out_1 - out_2))
            self.assertLessEqual(max_diff, 1e-5)

    def _get_embeds(self, wte, input_ids):
        # ^^ In our TF models, the input_embeddings can take slightly different forms,
        # so we try a few of them.
        # We used to fall back to just synthetically creating a dummy tensor of ones:
        try:
            x = wte(input_ids, mode="embedding")
        except Exception:
thomwolf's avatar
thomwolf committed
414
            try:
415
                x = wte([input_ids], mode="embedding")
416
            except Exception:
thomwolf's avatar
thomwolf committed
417
                try:
418
                    x = wte([input_ids, None, None, None], mode="embedding")
419
                except Exception:
420
                    if hasattr(self.model_tester, "embedding_size"):
421
                        x = tf.ones(input_ids.shape + [self.model_tester.embedding_size], dtype=tf.dtypes.float32,)
422
                    else:
423
                        x = tf.ones(input_ids.shape + [self.model_tester.hidden_size], dtype=tf.dtypes.float32,)
424
425
426
427
428
429
430
431
        return x

    def test_inputs_embeds(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.is_encoder_decoder:
            input_ids = inputs_dict["input_ids"]
            del inputs_dict["input_ids"]
        else:
432
            encoder_input_ids = inputs_dict["inputs"]
433
            decoder_input_ids = inputs_dict["decoder_input_ids"]
434
            del inputs_dict["inputs"]
435
436
437
438
439
440
            del inputs_dict["decoder_input_ids"]

        for model_class in self.all_model_classes:
            model = model_class(config)

            wte = model.get_input_embeddings()
thomwolf's avatar
thomwolf committed
441
            if not self.is_encoder_decoder:
442
                inputs_dict["inputs_embeds"] = self._get_embeds(wte, input_ids)
thomwolf's avatar
thomwolf committed
443
            else:
444
                inputs_dict["inputs_embeds"] = self._get_embeds(wte, encoder_input_ids)
445
446
                inputs_dict["decoder_inputs_embeds"] = self._get_embeds(wte, decoder_input_ids)

447
            model(inputs_dict)
448

449
    def test_lm_head_model_random_no_beam_search_generate(self):
450
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
451
        input_ids = inputs_dict["input_ids"] if "input_ids" in inputs_dict else inputs_dict["inputs"]
452

453
        # iterate over all generative models
454
455
456
457
        for model_class in self.all_generative_model_classes:
            model = model_class(config)

            if config.bos_token_id is None:
458
                # if bos token id is not defined mobel needs input_ids
459
                with self.assertRaises(AssertionError):
460
                    model.generate(do_sample=True, max_length=5)
461
                # num_return_sequences = 1
462
                self._check_generated_ids(model.generate(input_ids, do_sample=True))
463
            else:
464
                # num_return_sequences = 1
465
                self._check_generated_ids(model.generate(do_sample=True, max_length=5))
466
467

            with self.assertRaises(AssertionError):
468
                # generating multiple sequences when no beam search generation
469
470
471
                # is not allowed as it would always generate the same sequences
                model.generate(input_ids, do_sample=False, num_return_sequences=2)

472
473
            # num_return_sequences > 1, sample
            self._check_generated_ids(model.generate(input_ids, do_sample=True, num_return_sequences=2))
474
475

            # check bad words tokens language generation
476
477
            # create list of 1-seq bad token and list of 2-seq of bad tokens
            bad_words_ids = [self._generate_random_bad_tokens(1, model), self._generate_random_bad_tokens(2, model)]
478
            output_tokens = model.generate(
479
                input_ids, do_sample=True, bad_words_ids=bad_words_ids, num_return_sequences=2
480
            )
481
            # only count generated tokens
482
483
            generated_ids = output_tokens[:, input_ids.shape[-1] :]
            self.assertFalse(self._check_match_tokens(generated_ids.numpy().tolist(), bad_words_ids))
484

485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
    def test_lm_head_model_random_beam_search_generate(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        input_ids = inputs_dict["input_ids"] if "input_ids" in inputs_dict else inputs_dict["inputs"]

        for model_class in self.all_generative_model_classes:
            model = model_class(config)

            if config.bos_token_id is None:
                # if bos token id is not defined mobel needs input_ids, num_return_sequences = 1
                self._check_generated_ids(model.generate(input_ids, do_sample=True, num_beams=2))
            else:
                # num_return_sequences = 1
                self._check_generated_ids(model.generate(do_sample=True, max_length=5, num_beams=2))

            with self.assertRaises(AssertionError):
                # generating more sequences than having beams leads is not possible
                model.generate(input_ids, do_sample=False, num_return_sequences=3, num_beams=2)

            # num_return_sequences > 1, sample
            self._check_generated_ids(model.generate(input_ids, do_sample=True, num_beams=2, num_return_sequences=2,))
            # num_return_sequences > 1, greedy
            self._check_generated_ids(model.generate(input_ids, do_sample=False, num_beams=2, num_return_sequences=2))

            # check bad words tokens language generation
            # create list of 1-seq bad token and list of 2-seq of bad tokens
            bad_words_ids = [self._generate_random_bad_tokens(1, model), self._generate_random_bad_tokens(2, model)]
511
            output_tokens = model.generate(
512
                input_ids, do_sample=False, bad_words_ids=bad_words_ids, num_beams=2, num_return_sequences=2
513
            )
514
            # only count generated tokens
515
516
517
            generated_ids = output_tokens[:, input_ids.shape[-1] :]
            self.assertFalse(self._check_match_tokens(generated_ids.numpy().tolist(), bad_words_ids))

518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
    def _generate_random_bad_tokens(self, num_bad_tokens, model):
        # special tokens cannot be bad tokens
        special_tokens = []
        if model.config.bos_token_id is not None:
            special_tokens.append(model.config.bos_token_id)
        if model.config.pad_token_id is not None:
            special_tokens.append(model.config.pad_token_id)
        if model.config.eos_token_id is not None:
            special_tokens.append(model.config.eos_token_id)

        # create random bad tokens that are not special tokens
        bad_tokens = []
        while len(bad_tokens) < num_bad_tokens:
            token = tf.squeeze(ids_tensor((1, 1), self.model_tester.vocab_size), 0).numpy()[0]
            if token not in special_tokens:
                bad_tokens.append(token)
        return bad_tokens

536
    def _check_generated_ids(self, output_ids):
537
538
539
540
        for token_id in output_ids[0].numpy().tolist():
            self.assertGreaterEqual(token_id, 0)
            self.assertLess(token_id, self.model_tester.vocab_size)

541
542
543
544
545
546
547
548
549
550
551
552
    def _check_match_tokens(self, generated_ids, bad_words_ids):
        # for all bad word tokens
        for bad_word_ids in bad_words_ids:
            # for all slices in batch
            for generated_ids_slice in generated_ids:
                # for all word idx
                for i in range(len(bad_word_ids), len(generated_ids_slice)):
                    # if tokens match
                    if generated_ids_slice[i - len(bad_word_ids) : i] == bad_word_ids:
                        return True
        return False

thomwolf's avatar
thomwolf committed
553

thomwolf's avatar
thomwolf committed
554
def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None):
thomwolf's avatar
thomwolf committed
555
556
557
558
559
560
561
562
563
564
565
566
    """Creates a random int32 tensor of the shape within the vocab size."""
    if rng is None:
        rng = random.Random()

    total_dims = 1
    for dim in shape:
        total_dims *= dim

    values = []
    for _ in range(total_dims):
        values.append(rng.randint(0, vocab_size - 1))

567
    output = tf.constant(values, shape=shape, dtype=dtype if dtype is not None else tf.int32)
thomwolf's avatar
thomwolf committed
568
569

    return output
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664


@require_tf
class UtilsFunctionsTest(unittest.TestCase):

    # tests whether the top_k_top_p_filtering function behaves as expected
    def test_top_k_top_p_filtering(self):
        logits = tf.convert_to_tensor(
            [
                [
                    8.2220991,  # 3rd highest value; idx. 0
                    -0.5620044,
                    5.23229752,
                    4.0386393,
                    -6.8798378,
                    -0.54785802,
                    -3.2012153,
                    2.92777176,
                    1.88171953,
                    7.35341276,  # 5th highest value; idx. 9
                    8.43207833,  # 2nd highest value; idx. 10
                    -9.85711836,
                    -5.96209236,
                    -1.13039161,
                    -7.1115294,
                    -0.8369633,
                    -5.3186408,
                    7.06427407,
                    0.81369344,
                    -0.82023817,
                    -5.9179796,
                    0.58813443,
                    -6.99778438,
                    4.71551189,
                    -0.18771637,
                    7.44020759,  # 4th highest value; idx. 25
                    9.38450987,  # 1st highest value; idx. 26
                    2.12662941,
                    -9.32562038,
                    2.35652522,
                ],  # cummulative prob of 5 highest values <= 0.6
                [
                    0.58425518,
                    4.53139238,
                    -5.57510464,
                    -6.28030699,
                    -7.19529503,
                    -4.02122551,
                    1.39337037,
                    -6.06707057,
                    1.59480517,
                    -9.643119,
                    0.03907799,
                    0.67231762,
                    -8.88206726,
                    6.27115922,  # 4th highest value; idx. 13
                    2.28520723,
                    4.82767506,
                    4.30421368,
                    8.8275313,  # 2nd highest value; idx. 17
                    5.44029958,  # 5th highest value; idx. 18
                    -4.4735794,
                    7.38579536,  # 3rd highest value; idx. 20
                    -2.91051663,
                    2.61946077,
                    -2.5674762,
                    -9.48959302,
                    -4.02922645,
                    -1.35416918,
                    9.67702323,  # 1st highest value; idx. 27
                    -5.89478553,
                    1.85370467,
                ],  # cummulative prob of 5 highest values <= 0.6
            ],
            dtype=tf.float32,
        )

        non_inf_expected_idx = tf.convert_to_tensor(
            [[0, 0], [0, 9], [0, 10], [0, 25], [0, 26], [1, 13], [1, 17], [1, 18], [1, 20], [1, 27]], dtype=tf.int32,
        )  # expected non filtered idx as noted above

        non_inf_expected_output = tf.convert_to_tensor(
            [8.222099, 7.3534126, 8.432078, 7.4402075, 9.38451, 6.271159, 8.827531, 5.4402995, 7.3857956, 9.677023],
            dtype=tf.float32,
        )  # expected non filtered values as noted above

        output = tf_top_k_top_p_filtering(logits, top_k=10, top_p=0.6, min_tokens_to_keep=4)

        non_inf_output = output[output != -float("inf")]
        non_inf_idx = tf.cast(
            tf.where(tf.not_equal(output, tf.constant(-float("inf"), dtype=tf.float32))), dtype=tf.int32,
        )

        tf.debugging.assert_near(non_inf_output, non_inf_expected_output, rtol=1e-12)
        tf.debugging.assert_equal(non_inf_idx, non_inf_expected_idx)