test_modeling_tf_mbart.py 8.49 KB
Newer Older
1
# coding=utf-8
2
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
3
4
5
6
7
8
9
10
11
12
13
14
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
15

Matt's avatar
Matt committed
16
17
from __future__ import annotations

18
19
20
import unittest

from transformers import AutoTokenizer, MBartConfig, is_tf_available
Matt's avatar
Matt committed
21
from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
22
from transformers.utils import cached_property
23

Yih-Dar's avatar
Yih-Dar committed
24
25
from ...test_configuration_common import ConfigTester
from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor
26
from ...test_pipeline_mixin import PipelineTesterMixin
27
28


29
if is_tf_available():
30
31
    import tensorflow as tf

32
    from transformers import TFAutoModelForSeq2SeqLM, TFMBartForConditionalGeneration, TFMBartModel
33
34


35
36
@require_tf
class TFMBartModelTester:
37
    config_cls = MBartConfig
38
39
40
41
42
43
44
45
46
47
48
49
    config_updates = {}
    hidden_act = "gelu"

    def __init__(
        self,
        parent,
        batch_size=13,
        seq_length=7,
        is_training=True,
        use_labels=False,
        vocab_size=99,
        hidden_size=32,
50
        num_hidden_layers=2,
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
        num_attention_heads=4,
        intermediate_size=37,
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        max_position_embeddings=20,
        eos_token_id=2,
        pad_token_id=1,
        bos_token_id=0,
    ):
        self.parent = parent
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.is_training = is_training
        self.use_labels = use_labels
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.intermediate_size = intermediate_size
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
        self.max_position_embeddings = max_position_embeddings
        self.eos_token_id = eos_token_id
        self.pad_token_id = pad_token_id
        self.bos_token_id = bos_token_id

    def prepare_config_and_inputs_for_common(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size)
        eos_tensor = tf.expand_dims(tf.constant([self.eos_token_id] * self.batch_size), 1)
        input_ids = tf.concat([input_ids, eos_tensor], axis=1)

        decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

        config = self.config_cls(
            vocab_size=self.vocab_size,
            d_model=self.hidden_size,
            encoder_layers=self.num_hidden_layers,
            decoder_layers=self.num_hidden_layers,
            encoder_attention_heads=self.num_attention_heads,
            decoder_attention_heads=self.num_attention_heads,
            encoder_ffn_dim=self.intermediate_size,
            decoder_ffn_dim=self.intermediate_size,
            dropout=self.hidden_dropout_prob,
            attention_dropout=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            eos_token_ids=[2],
            bos_token_id=self.bos_token_id,
            pad_token_id=self.pad_token_id,
            decoder_start_token_id=self.pad_token_id,
            **self.config_updates,
        )
        inputs_dict = prepare_mbart_inputs_dict(config, input_ids, decoder_input_ids)
        return config, inputs_dict
104

105
106
107
    def check_decoder_model_past_large_inputs(self, config, inputs_dict):
        model = TFMBartModel(config=config).get_decoder()
        input_ids = inputs_dict["input_ids"]
108

109
110
        input_ids = input_ids[:1, :]
        attention_mask = inputs_dict["attention_mask"][:1, :]
111
        head_mask = inputs_dict["head_mask"]
112
        self.batch_size = 1
113

114
        # first forward pass
115
        outputs = model(input_ids, attention_mask=attention_mask, head_mask=head_mask, use_cache=True)
116

117
118
        output, past_key_values = outputs.to_tuple()
        past_key_values = past_key_values[1]
119

120
121
122
123
124
125
126

def prepare_mbart_inputs_dict(
    config,
    input_ids,
    decoder_input_ids,
    attention_mask=None,
    decoder_attention_mask=None,
127
128
    head_mask=None,
    decoder_head_mask=None,
129
    cross_attn_head_mask=None,
130
131
132
133
134
135
136
137
138
139
140
):
    if attention_mask is None:
        attention_mask = tf.cast(tf.math.not_equal(input_ids, config.pad_token_id), tf.int8)
    if decoder_attention_mask is None:
        decoder_attention_mask = tf.concat(
            [
                tf.ones(decoder_input_ids[:, :1].shape, dtype=tf.int8),
                tf.cast(tf.math.not_equal(decoder_input_ids[:, 1:], config.pad_token_id), tf.int8),
            ],
            axis=-1,
        )
141
142
143
144
    if head_mask is None:
        head_mask = tf.ones((config.encoder_layers, config.encoder_attention_heads))
    if decoder_head_mask is None:
        decoder_head_mask = tf.ones((config.decoder_layers, config.decoder_attention_heads))
145
146
    if cross_attn_head_mask is None:
        cross_attn_head_mask = tf.ones((config.decoder_layers, config.decoder_attention_heads))
147
148
149
150
151
    return {
        "input_ids": input_ids,
        "decoder_input_ids": decoder_input_ids,
        "attention_mask": attention_mask,
        "decoder_attention_mask": decoder_attention_mask,
152
        "head_mask": head_mask,
153
154
        "decoder_head_mask": decoder_head_mask,
        "cross_attn_head_mask": cross_attn_head_mask,
155
156
157
158
    }


@require_tf
159
class TFMBartModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
160
161
    all_model_classes = (TFMBartForConditionalGeneration, TFMBartModel) if is_tf_available() else ()
    all_generative_model_classes = (TFMBartForConditionalGeneration,) if is_tf_available() else ()
162
163
164
165
166
    pipeline_model_mapping = (
        {
            "feature-extraction": TFMBartModel,
            "summarization": TFMBartForConditionalGeneration,
            "text2text-generation": TFMBartForConditionalGeneration,
Yih-Dar's avatar
Yih-Dar committed
167
            "translation": TFMBartForConditionalGeneration,
168
169
170
171
        }
        if is_tf_available()
        else {}
    )
172
173
    is_encoder_decoder = True
    test_pruning = False
174
    test_onnx = False
175

176
177
178
179
180
181
182
183
184
185
    # TODO: Fix the failed tests
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
    ):
        if pipeline_test_casse_name != "FeatureExtractionPipelineTests":
            # Exception encountered when calling layer '...'
            return True

        return False

186
187
188
189
190
191
192
193
194
195
196
    def setUp(self):
        self.model_tester = TFMBartModelTester(self)
        self.config_tester = ConfigTester(self, config_class=MBartConfig)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_decoder_model_past_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
        self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)

197
198
199

@require_sentencepiece
@require_tokenizers
Lysandre Debut's avatar
Lysandre Debut committed
200
@require_tf
201
class TFMBartModelIntegrationTest(unittest.TestCase):
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    src_text = [
        " UN Chief Says There Is No Military Solution in Syria",
    ]
    expected_text = [
        "艦eful ONU declar膬 c膬 nu exist膬 o solu牛ie militar膬 卯n Siria",
    ]
    model_name = "facebook/mbart-large-en-ro"

    @cached_property
    def tokenizer(self):
        return AutoTokenizer.from_pretrained(self.model_name)

    @cached_property
    def model(self):
216
        model = TFAutoModelForSeq2SeqLM.from_pretrained(self.model_name)
217
218
219
220
221
222
223
        return model

    def _assert_generated_batch_equal_expected(self, **tokenizer_kwargs):
        generated_words = self.translate_src_text(**tokenizer_kwargs)
        self.assertListEqual(self.expected_text, generated_words)

    def translate_src_text(self, **tokenizer_kwargs):
224
        model_inputs = self.tokenizer(self.src_text, **tokenizer_kwargs, return_tensors="tf")
225
226
227
228
229
230
231
232
233
        generated_ids = self.model.generate(
            model_inputs.input_ids, attention_mask=model_inputs.attention_mask, num_beams=2
        )
        generated_words = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        return generated_words

    @slow
    def test_batch_generation_en_ro(self):
        self._assert_generated_batch_equal_expected()