test_modeling_roberta.py 15.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Aymeric Augustin's avatar
Aymeric Augustin committed
15

16
17

import unittest
18

19
from transformers import is_torch_available
thomwolf's avatar
thomwolf committed
20

21
from .test_configuration_common import ConfigTester
22
from .test_modeling_common import ModelTesterMixin, ids_tensor
Aymeric Augustin's avatar
Aymeric Augustin committed
23
24
25
from .utils import CACHE_DIR, require_torch, slow, torch_device


26
if is_torch_available():
thomwolf's avatar
thomwolf committed
27
    import torch
28
29
30
31
32
33
34
    from transformers import (
        RobertaConfig,
        RobertaModel,
        RobertaForMaskedLM,
        RobertaForSequenceClassification,
        RobertaForTokenClassification,
    )
Lysandre's avatar
Lysandre committed
35
    from transformers.modeling_roberta import RobertaEmbeddings, RobertaForMultipleChoice, RobertaForQuestionAnswering
36
    from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
37
38


39
@require_torch
40
class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
41

thomwolf's avatar
thomwolf committed
42
    all_model_classes = (RobertaForMaskedLM, RobertaModel) if is_torch_available() else ()
43
44

    class RobertaModelTester(object):
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_input_mask=True,
            use_token_type_ids=True,
            use_labels=True,
            vocab_size=99,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            intermediate_size=37,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            scope=None,
        ):
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_input_mask = use_input_mask
            self.use_token_type_ids = use_token_type_ids
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = RobertaConfig(
thomwolf's avatar
thomwolf committed
113
                vocab_size=self.vocab_size,
114
115
116
117
118
119
120
121
122
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
123
124
                initializer_range=self.initializer_range,
            )
125
126
127
128

            return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

        def check_loss_output(self, result):
129
            self.parent.assertListEqual(list(result["loss"].size()), [])
130

131
132
133
        def create_and_check_roberta_model(
            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
        ):
134
            model = RobertaModel(config=config)
135
            model.to(torch_device)
136
            model.eval()
thomwolf's avatar
thomwolf committed
137
138
            sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
            sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids)
139
140
141
142
143
144
145
            sequence_output, pooled_output = model(input_ids)

            result = {
                "sequence_output": sequence_output,
                "pooled_output": pooled_output,
            }
            self.parent.assertListEqual(
146
147
                list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
            )
148
149
            self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])

150
151
152
        def create_and_check_roberta_for_masked_lm(
            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
        ):
153
            model = RobertaForMaskedLM(config=config)
154
            model.to(torch_device)
155
            model.eval()
156
157
158
            loss, prediction_scores = model(
                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
            )
159
160
161
162
163
            result = {
                "loss": loss,
                "prediction_scores": prediction_scores,
            }
            self.parent.assertListEqual(
164
165
                list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
            )
166
167
            self.check_loss_output(result)

168
169
170
        def create_and_check_roberta_for_token_classification(
            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
        ):
Matt Maybeno's avatar
Matt Maybeno committed
171
172
            config.num_labels = self.num_labels
            model = RobertaForTokenClassification(config=config)
173
            model.to(torch_device)
Matt Maybeno's avatar
Matt Maybeno committed
174
            model.eval()
175
176
177
            loss, logits = model(
                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
            )
Matt Maybeno's avatar
Matt Maybeno committed
178
179
180
181
182
            result = {
                "loss": loss,
                "logits": logits,
            }
            self.parent.assertListEqual(
183
184
                list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
            )
Matt Maybeno's avatar
Matt Maybeno committed
185
186
            self.check_loss_output(result)

Lysandre's avatar
Lysandre committed
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
        def create_and_check_roberta_for_multiple_choice(
                self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
        ):
            config.num_choices = self.num_choices
            model = RobertaForMultipleChoice(config=config)
            model.to(torch_device)
            model.eval()
            multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
            multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
            multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
            loss, logits = model(
                multiple_choice_inputs_ids,
                attention_mask=multiple_choice_input_mask,
                token_type_ids=multiple_choice_token_type_ids,
                labels=choice_labels,
            )
            result = {
                "loss": loss,
                "logits": logits,
            }
            self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices])
            self.check_loss_output(result)

        def create_and_check_roberta_for_question_answering(
                self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
        ):
            model = RobertaForQuestionAnswering(config=config)
            model.to(torch_device)
            model.eval()
            loss, start_logits, end_logits = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                start_positions=sequence_labels,
                end_positions=sequence_labels,
            )
            result = {
                "loss": loss,
                "start_logits": start_logits,
                "end_logits": end_logits,
            }
            self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
            self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
            self.check_loss_output(result)

232
233
        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
234
235
236
237
238
239
240
241
242
243
            (
                config,
                input_ids,
                token_type_ids,
                input_mask,
                sequence_labels,
                token_labels,
                choice_labels,
            ) = config_and_inputs
            inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
            return config, inputs_dict

    def setUp(self):
        self.model_tester = RobertaModelTest.RobertaModelTester(self)
        self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_roberta_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_masked_lm(*config_and_inputs)

Lysandre's avatar
Lysandre committed
261
262
263
264
265
266
267
268
269
270
271
272
    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_token_classification(*config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_multiple_choice(*config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_question_answering(*config_and_inputs)

273
    @slow
274
275
    def test_model_from_pretrained(self):
        for model_name in list(ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
276
            model = RobertaModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
277
278
            self.assertIsNotNone(model)

Dom Hudson's avatar
Dom Hudson committed
279
280
281
282
283
284
285
286
287
288
289
    def test_create_position_ids_respects_padding_index(self):
        """ Ensure that the default position ids only assign a sequential . This is a regression
        test for https://github.com/huggingface/transformers/issues/1761

        The position ids should be masked with the embedding object's padding index. Therefore, the
        first available non-padding position index is RobertaEmbeddings.padding_idx + 1
        """
        config = self.model_tester.prepare_config_and_inputs()[0]
        model = RobertaEmbeddings(config=config)

        input_ids = torch.as_tensor([[12, 31, 13, model.padding_idx]])
290
291
292
        expected_positions = torch.as_tensor(
            [[0 + model.padding_idx + 1, 1 + model.padding_idx + 1, 2 + model.padding_idx + 1, model.padding_idx]]
        )
Dom Hudson's avatar
Dom Hudson committed
293
294

        position_ids = model.create_position_ids_from_input_ids(input_ids)
295
        self.assertEqual(position_ids.shape, expected_positions.shape)
Dom Hudson's avatar
Dom Hudson committed
296
297
298
299
300
301
302
303
304
305
        self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))

    def test_create_position_ids_from_inputs_embeds(self):
        """ Ensure that the default position ids only assign a sequential . This is a regression
        test for https://github.com/huggingface/transformers/issues/1761

        The position ids should be masked with the embedding object's padding index. Therefore, the
        first available non-padding position index is RobertaEmbeddings.padding_idx + 1
        """
        config = self.model_tester.prepare_config_and_inputs()[0]
306
307
308
309
310
311
312
313
314
315
316
        embeddings = RobertaEmbeddings(config=config)

        inputs_embeds = torch.Tensor(2, 4, 30)
        expected_single_positions = [
            0 + embeddings.padding_idx + 1,
            1 + embeddings.padding_idx + 1,
            2 + embeddings.padding_idx + 1,
            3 + embeddings.padding_idx + 1,
        ]
        expected_positions = torch.as_tensor([expected_single_positions, expected_single_positions])
        position_ids = embeddings.create_position_ids_from_inputs_embeds(inputs_embeds)
317
318
        self.assertEqual(position_ids.shape, expected_positions.shape)
        self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
319
320
321


class RobertaModelIntegrationTest(unittest.TestCase):
322
    @slow
323
    def test_inference_masked_lm(self):
324
        model = RobertaForMaskedLM.from_pretrained("roberta-base")
325

326
        input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
327
328
        output = model(input_ids)[0]
        expected_shape = torch.Size((1, 11, 50265))
329
        self.assertEqual(output.shape, expected_shape)
330
331
        # compare the actual values for a slice.
        expected_slice = torch.Tensor(
332
            [[[33.8843, -4.3107, 22.7779], [4.6533, -2.8099, 13.6252], [1.8222, -3.6898, 8.8600]]]
333
        )
334
        self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
335

336
    @slow
337
    def test_inference_no_head(self):
338
        model = RobertaModel.from_pretrained("roberta-base")
339

340
        input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
341
342
343
        output = model(input_ids)[0]
        # compare the actual values for a slice.
        expected_slice = torch.Tensor(
344
            [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]
345
        )
346
        self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
347

348
    @slow
349
    def test_inference_classification_head(self):
350
        model = RobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
351

352
        input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
353
354
        output = model(input_ids)[0]
        expected_shape = torch.Size((1, 3))
355
356
357
        self.assertEqual(output.shape, expected_shape)
        expected_tensor = torch.Tensor([[-0.9469, 0.3913, 0.5118]])
        self.assertTrue(torch.allclose(output, expected_tensor, atol=1e-3))