"docs/source/ko/tasks/audio_classification.md" did not exist on "ffc319e7b8e9988505d010f47a637088d102fff2"
test_modeling_tf_roberta.py 10.6 KB
Newer Older
thomwolf's avatar
thomwolf committed
1
# coding=utf-8
Sylvain Gugger's avatar
Sylvain Gugger committed
2
# Copyright 2020 The HuggingFace Team. All rights reserved.
thomwolf's avatar
thomwolf committed
3
4
5
6
7
8
9
10
11
12
13
14
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Aymeric Augustin's avatar
Aymeric Augustin committed
15

thomwolf's avatar
thomwolf committed
16
17
18

import unittest

Aymeric Augustin's avatar
Aymeric Augustin committed
19
from transformers import RobertaConfig, is_tf_available
20
from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
Aymeric Augustin's avatar
Aymeric Augustin committed
21

22
from .test_configuration_common import ConfigTester
23
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
thomwolf's avatar
thomwolf committed
24
25
26
27


if is_tf_available():
    import numpy
28
29
    import tensorflow as tf

Sylvain Gugger's avatar
Sylvain Gugger committed
30
    from transformers.models.roberta.modeling_tf_roberta import (
31
        TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
32
        TFRobertaForMaskedLM,
33
34
        TFRobertaForMultipleChoice,
        TFRobertaForQuestionAnswering,
35
36
        TFRobertaForSequenceClassification,
        TFRobertaForTokenClassification,
37
        TFRobertaModel,
38
    )
thomwolf's avatar
thomwolf committed
39
40


41
42
class TFRobertaModelTester:
    def __init__(
Lysandre's avatar
Lysandre committed
43
44
        self,
        parent,
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    ):
        self.parent = parent
        self.batch_size = 13
        self.seq_length = 7
        self.is_training = True
        self.use_input_mask = True
        self.use_token_type_ids = True
        self.use_labels = True
        self.vocab_size = 99
        self.hidden_size = 32
        self.num_hidden_layers = 5
        self.num_attention_heads = 4
        self.intermediate_size = 37
        self.hidden_act = "gelu"
        self.hidden_dropout_prob = 0.1
        self.attention_probs_dropout_prob = 0.1
        self.max_position_embeddings = 512
        self.type_vocab_size = 16
        self.type_sequence_label_size = 2
        self.initializer_range = 0.02
        self.num_labels = 3
        self.num_choices = 4
        self.scope = None

    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = RobertaConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            initializer_range=self.initializer_range,
        )

        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

    def create_and_check_roberta_model(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFRobertaModel(config=config)
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
109
        result = model(inputs)
110
111

        inputs = [input_ids, input_mask]
Sylvain Gugger's avatar
Sylvain Gugger committed
112
        result = model(inputs)
113

Sylvain Gugger's avatar
Sylvain Gugger committed
114
        result = model(input_ids)
115

116
        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
117
118
119
120
121

    def create_and_check_roberta_for_masked_lm(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFRobertaForMaskedLM(config=config)
Sylvain Gugger's avatar
Sylvain Gugger committed
122
        result = model([input_ids, input_mask, token_type_ids])
123
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
124
125
126
127
128
129
130

    def create_and_check_roberta_for_token_classification(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.num_labels = self.num_labels
        model = TFRobertaForTokenClassification(config=config)
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
131
        result = model(inputs)
132
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
133
134
135
136
137
138

    def create_and_check_roberta_for_question_answering(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFRobertaForQuestionAnswering(config=config)
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
Sylvain Gugger's avatar
Sylvain Gugger committed
139
        result = model(inputs)
140
141
        self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
        self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
142

143
144
145
146
147
148
149
150
151
152
153
154
155
    def create_and_check_roberta_for_multiple_choice(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.num_choices = self.num_choices
        model = TFRobertaForMultipleChoice(config=config)
        multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
        multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
        multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
        inputs = {
            "input_ids": multiple_choice_inputs_ids,
            "attention_mask": multiple_choice_input_mask,
            "token_type_ids": multiple_choice_token_type_ids,
        }
Sylvain Gugger's avatar
Sylvain Gugger committed
156
        result = model(inputs)
157
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
158

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
    def prepare_config_and_inputs_for_common(self):
        config_and_inputs = self.prepare_config_and_inputs()
        (
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
        ) = config_and_inputs
        inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
        return config, inputs_dict


174
@require_tf
175
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
thomwolf's avatar
thomwolf committed
176

177
    all_model_classes = (
178
179
180
181
182
183
184
185
186
        (
            TFRobertaModel,
            TFRobertaForMaskedLM,
            TFRobertaForSequenceClassification,
            TFRobertaForTokenClassification,
            TFRobertaForQuestionAnswering,
        )
        if is_tf_available()
        else ()
187
    )
188
    test_head_masking = False
189
    test_onnx = False
thomwolf's avatar
thomwolf committed
190
191

    def setUp(self):
192
        self.model_tester = TFRobertaModelTester(self)
thomwolf's avatar
thomwolf committed
193
194
195
196
197
198
199
200
201
202
203
204
205
        self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_roberta_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_masked_lm(*config_and_inputs)

Lysandre's avatar
Lysandre committed
206
207
208
209
    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_token_classification(*config_and_inputs)

210
211
212
    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_question_answering(*config_and_inputs)
213
214
215
216

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_multiple_choice(*config_and_inputs)
217

218
    @slow
thomwolf's avatar
thomwolf committed
219
    def test_model_from_pretrained(self):
220
        for model_name in TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
221
            model = TFRobertaModel.from_pretrained(model_name)
thomwolf's avatar
thomwolf committed
222
223
224
            self.assertIsNotNone(model)


225
@require_tf
226
227
@require_sentencepiece
@require_tokenizers
thomwolf's avatar
thomwolf committed
228
class TFRobertaModelIntegrationTest(unittest.TestCase):
229
    @slow
thomwolf's avatar
thomwolf committed
230
    def test_inference_masked_lm(self):
231
        model = TFRobertaForMaskedLM.from_pretrained("roberta-base")
232

233
        input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
thomwolf's avatar
thomwolf committed
234
235
        output = model(input_ids)[0]
        expected_shape = [1, 11, 50265]
236
        self.assertEqual(list(output.numpy().shape), expected_shape)
thomwolf's avatar
thomwolf committed
237
238
        # compare the actual values for a slice.
        expected_slice = tf.constant(
239
            [[[33.8802, -4.3103, 22.7761], [4.6539, -2.8098, 13.6253], [1.8228, -3.6898, 8.8600]]]
thomwolf's avatar
thomwolf committed
240
        )
241
        self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
thomwolf's avatar
thomwolf committed
242

243
    @slow
thomwolf's avatar
thomwolf committed
244
    def test_inference_no_head(self):
245
        model = TFRobertaModel.from_pretrained("roberta-base")
246

247
        input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
thomwolf's avatar
thomwolf committed
248
249
250
        output = model(input_ids)[0]
        # compare the actual values for a slice.
        expected_slice = tf.constant(
251
            [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0540, -0.0175], [0.0548, 0.0799, 0.1687]]]
thomwolf's avatar
thomwolf committed
252
        )
253
        self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
thomwolf's avatar
thomwolf committed
254

255
    @slow
thomwolf's avatar
thomwolf committed
256
    def test_inference_classification_head(self):
257
        model = TFRobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
258

259
        input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
thomwolf's avatar
thomwolf committed
260
261
        output = model(input_ids)[0]
        expected_shape = [1, 3]
262
263
        self.assertEqual(list(output.numpy().shape), expected_shape)
        expected_tensor = tf.constant([[-0.9469, 0.3913, 0.5118]])
264
        self.assertTrue(numpy.allclose(output.numpy(), expected_tensor.numpy(), atol=1e-4))