modeling_auto.py 101 KB
Newer Older
thomwolf's avatar
thomwolf committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Auto Model class. """


18
import warnings
Julien Chaumond's avatar
Julien Chaumond committed
19
from collections import OrderedDict
thomwolf's avatar
thomwolf committed
20

21
22
from .configuration_auto import (
    AlbertConfig,
23
    AutoConfig,
Sam Shleifer's avatar
Sam Shleifer committed
24
    BartConfig,
25
26
27
28
    BertConfig,
    CamembertConfig,
    CTRLConfig,
    DistilBertConfig,
Lysandre Debut's avatar
Lysandre Debut committed
29
    ElectraConfig,
30
    EncoderDecoderConfig,
Lysandre's avatar
Lysandre committed
31
    FlaubertConfig,
32
    GPT2Config,
Iz Beltagy's avatar
Iz Beltagy committed
33
    LongformerConfig,
34
    MBartConfig,
Vasily Shamporov's avatar
Vasily Shamporov committed
35
    MobileBertConfig,
36
    OpenAIGPTConfig,
37
    PegasusConfig,
Patrick von Platen's avatar
Patrick von Platen committed
38
    ReformerConfig,
Yacine Jernite's avatar
Yacine Jernite committed
39
    RetriBertConfig,
40
    RobertaConfig,
41
    T5Config,
42
43
44
    TransfoXLConfig,
    XLMConfig,
    XLMRobertaConfig,
Aymeric Augustin's avatar
Aymeric Augustin committed
45
46
    XLNetConfig,
)
47
from .configuration_marian import MarianConfig
48
from .configuration_utils import PretrainedConfig
Aymeric Augustin's avatar
Aymeric Augustin committed
49
50
from .modeling_albert import (
    AlbertForMaskedLM,
51
    AlbertForMultipleChoice,
52
    AlbertForPreTraining,
Aymeric Augustin's avatar
Aymeric Augustin committed
53
54
    AlbertForQuestionAnswering,
    AlbertForSequenceClassification,
55
    AlbertForTokenClassification,
Aymeric Augustin's avatar
Aymeric Augustin committed
56
    AlbertModel,
57
)
Suraj Patil's avatar
Suraj Patil committed
58
59
60
61
62
63
from .modeling_bart import (
    BartForConditionalGeneration,
    BartForQuestionAnswering,
    BartForSequenceClassification,
    BartModel,
)
64
65
from .modeling_bert import (
    BertForMaskedLM,
Julien Chaumond's avatar
Julien Chaumond committed
66
    BertForMultipleChoice,
thomwolf's avatar
thomwolf committed
67
    BertForPreTraining,
68
    BertForQuestionAnswering,
Aymeric Augustin's avatar
Aymeric Augustin committed
69
    BertForSequenceClassification,
70
    BertForTokenClassification,
71
    BertLMHeadModel,
Aymeric Augustin's avatar
Aymeric Augustin committed
72
    BertModel,
73
)
Aymeric Augustin's avatar
Aymeric Augustin committed
74
from .modeling_camembert import (
Suraj Patil's avatar
Suraj Patil committed
75
    CamembertForCausalLM,
Aymeric Augustin's avatar
Aymeric Augustin committed
76
    CamembertForMaskedLM,
Julien Chaumond's avatar
Julien Chaumond committed
77
    CamembertForMultipleChoice,
78
    CamembertForQuestionAnswering,
Aymeric Augustin's avatar
Aymeric Augustin committed
79
80
81
    CamembertForSequenceClassification,
    CamembertForTokenClassification,
    CamembertModel,
82
)
83
from .modeling_ctrl import CTRLLMHeadModel, CTRLModel
84
85
from .modeling_distilbert import (
    DistilBertForMaskedLM,
86
    DistilBertForMultipleChoice,
Aymeric Augustin's avatar
Aymeric Augustin committed
87
    DistilBertForQuestionAnswering,
88
89
    DistilBertForSequenceClassification,
    DistilBertForTokenClassification,
Aymeric Augustin's avatar
Aymeric Augustin committed
90
    DistilBertModel,
91
)
Lysandre Debut's avatar
Lysandre Debut committed
92
93
from .modeling_electra import (
    ElectraForMaskedLM,
Suraj Patil's avatar
Suraj Patil committed
94
    ElectraForMultipleChoice,
Lysandre Debut's avatar
Lysandre Debut committed
95
    ElectraForPreTraining,
96
    ElectraForQuestionAnswering,
97
    ElectraForSequenceClassification,
Lysandre Debut's avatar
Lysandre Debut committed
98
99
100
    ElectraForTokenClassification,
    ElectraModel,
)
101
from .modeling_encoder_decoder import EncoderDecoderModel
Lysandre's avatar
Lysandre committed
102
from .modeling_flaubert import (
103
    FlaubertForMultipleChoice,
104
    FlaubertForQuestionAnsweringSimple,
Lysandre's avatar
Lysandre committed
105
    FlaubertForSequenceClassification,
106
    FlaubertForTokenClassification,
Lysandre's avatar
Lysandre committed
107
108
109
    FlaubertModel,
    FlaubertWithLMHeadModel,
)
110
from .modeling_gpt2 import GPT2LMHeadModel, GPT2Model
111
112
from .modeling_longformer import (
    LongformerForMaskedLM,
113
    LongformerForMultipleChoice,
114
    LongformerForQuestionAnswering,
115
    LongformerForSequenceClassification,
116
    LongformerForTokenClassification,
117
118
    LongformerModel,
)
119
from .modeling_marian import MarianMTModel
120
from .modeling_mbart import MBartForConditionalGeneration
Vasily Shamporov's avatar
Vasily Shamporov committed
121
122
123
124
125
126
127
128
129
from .modeling_mobilebert import (
    MobileBertForMaskedLM,
    MobileBertForMultipleChoice,
    MobileBertForPreTraining,
    MobileBertForQuestionAnswering,
    MobileBertForSequenceClassification,
    MobileBertForTokenClassification,
    MobileBertModel,
)
130
from .modeling_openai import OpenAIGPTLMHeadModel, OpenAIGPTModel
131
from .modeling_pegasus import PegasusForConditionalGeneration
132
133
134
135
136
137
from .modeling_reformer import (
    ReformerForMaskedLM,
    ReformerForQuestionAnswering,
    ReformerModel,
    ReformerModelWithLMHead,
)
Yacine Jernite's avatar
Yacine Jernite committed
138
from .modeling_retribert import RetriBertModel
Aymeric Augustin's avatar
Aymeric Augustin committed
139
from .modeling_roberta import (
140
    RobertaForCausalLM,
Aymeric Augustin's avatar
Aymeric Augustin committed
141
    RobertaForMaskedLM,
Julien Chaumond's avatar
Julien Chaumond committed
142
    RobertaForMultipleChoice,
Julien Chaumond's avatar
Julien Chaumond committed
143
    RobertaForQuestionAnswering,
Aymeric Augustin's avatar
Aymeric Augustin committed
144
145
146
    RobertaForSequenceClassification,
    RobertaForTokenClassification,
    RobertaModel,
147
)
148
149
from .modeling_t5 import T5ForConditionalGeneration, T5Model
from .modeling_transfo_xl import TransfoXLLMHeadModel, TransfoXLModel
Aymeric Augustin's avatar
Aymeric Augustin committed
150
from .modeling_xlm import (
151
    XLMForMultipleChoice,
152
    XLMForQuestionAnsweringSimple,
Aymeric Augustin's avatar
Aymeric Augustin committed
153
    XLMForSequenceClassification,
154
    XLMForTokenClassification,
Aymeric Augustin's avatar
Aymeric Augustin committed
155
156
    XLMModel,
    XLMWithLMHeadModel,
157
158
159
)
from .modeling_xlm_roberta import (
    XLMRobertaForMaskedLM,
Julien Chaumond's avatar
Julien Chaumond committed
160
    XLMRobertaForMultipleChoice,
161
    XLMRobertaForQuestionAnswering,
Aymeric Augustin's avatar
Aymeric Augustin committed
162
    XLMRobertaForSequenceClassification,
163
    XLMRobertaForTokenClassification,
Aymeric Augustin's avatar
Aymeric Augustin committed
164
165
166
    XLMRobertaModel,
)
from .modeling_xlnet import (
Julien Chaumond's avatar
Julien Chaumond committed
167
    XLNetForMultipleChoice,
168
    XLNetForQuestionAnsweringSimple,
Aymeric Augustin's avatar
Aymeric Augustin committed
169
170
171
172
    XLNetForSequenceClassification,
    XLNetForTokenClassification,
    XLNetLMHeadModel,
    XLNetModel,
173
)
Lysandre Debut's avatar
Lysandre Debut committed
174
from .utils import logging
thomwolf's avatar
thomwolf committed
175

thomwolf's avatar
thomwolf committed
176

Lysandre Debut's avatar
Lysandre Debut committed
177
logger = logging.get_logger(__name__)
thomwolf's avatar
thomwolf committed
178
179


Julien Chaumond's avatar
Julien Chaumond committed
180
MODEL_MAPPING = OrderedDict(
Julien Chaumond's avatar
Julien Chaumond committed
181
    [
Yacine Jernite's avatar
Yacine Jernite committed
182
        (RetriBertConfig, RetriBertModel),
Julien Chaumond's avatar
Julien Chaumond committed
183
184
185
186
        (T5Config, T5Model),
        (DistilBertConfig, DistilBertModel),
        (AlbertConfig, AlbertModel),
        (CamembertConfig, CamembertModel),
187
        (XLMRobertaConfig, XLMRobertaModel),
Sam Shleifer's avatar
Sam Shleifer committed
188
        (BartConfig, BartModel),
Iz Beltagy's avatar
Iz Beltagy committed
189
        (LongformerConfig, LongformerModel),
190
        (RobertaConfig, RobertaModel),
Julien Chaumond's avatar
Julien Chaumond committed
191
192
193
        (BertConfig, BertModel),
        (OpenAIGPTConfig, OpenAIGPTModel),
        (GPT2Config, GPT2Model),
Vasily Shamporov's avatar
Vasily Shamporov committed
194
        (MobileBertConfig, MobileBertModel),
Julien Chaumond's avatar
Julien Chaumond committed
195
196
        (TransfoXLConfig, TransfoXLModel),
        (XLNetConfig, XLNetModel),
Lysandre's avatar
Lysandre committed
197
        (FlaubertConfig, FlaubertModel),
Julien Chaumond's avatar
Julien Chaumond committed
198
199
        (XLMConfig, XLMModel),
        (CTRLConfig, CTRLModel),
Lysandre Debut's avatar
Lysandre Debut committed
200
        (ElectraConfig, ElectraModel),
Patrick von Platen's avatar
Patrick von Platen committed
201
        (ReformerConfig, ReformerModel),
Julien Chaumond's avatar
Julien Chaumond committed
202
203
204
    ]
)

thomwolf's avatar
thomwolf committed
205
206
MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
    [
Yacine Jernite's avatar
Yacine Jernite committed
207
        (RetriBertConfig, RetriBertModel),
208
        (T5Config, T5ForConditionalGeneration),
thomwolf's avatar
thomwolf committed
209
        (DistilBertConfig, DistilBertForMaskedLM),
210
        (AlbertConfig, AlbertForPreTraining),
thomwolf's avatar
thomwolf committed
211
212
        (CamembertConfig, CamembertForMaskedLM),
        (XLMRobertaConfig, XLMRobertaForMaskedLM),
213
        (BartConfig, BartForConditionalGeneration),
Iz Beltagy's avatar
Iz Beltagy committed
214
        (LongformerConfig, LongformerForMaskedLM),
thomwolf's avatar
thomwolf committed
215
216
217
218
        (RobertaConfig, RobertaForMaskedLM),
        (BertConfig, BertForPreTraining),
        (OpenAIGPTConfig, OpenAIGPTLMHeadModel),
        (GPT2Config, GPT2LMHeadModel),
Vasily Shamporov's avatar
Vasily Shamporov committed
219
        (MobileBertConfig, MobileBertForPreTraining),
thomwolf's avatar
thomwolf committed
220
221
        (TransfoXLConfig, TransfoXLLMHeadModel),
        (XLNetConfig, XLNetLMHeadModel),
Lysandre's avatar
Lysandre committed
222
        (FlaubertConfig, FlaubertWithLMHeadModel),
thomwolf's avatar
thomwolf committed
223
224
        (XLMConfig, XLMWithLMHeadModel),
        (CTRLConfig, CTRLLMHeadModel),
Lysandre Debut's avatar
Lysandre Debut committed
225
        (ElectraConfig, ElectraForPreTraining),
thomwolf's avatar
thomwolf committed
226
227
228
    ]
)

Julien Chaumond's avatar
Julien Chaumond committed
229
MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
230
    [
231
        (T5Config, T5ForConditionalGeneration),
232
233
234
235
        (DistilBertConfig, DistilBertForMaskedLM),
        (AlbertConfig, AlbertForMaskedLM),
        (CamembertConfig, CamembertForMaskedLM),
        (XLMRobertaConfig, XLMRobertaForMaskedLM),
236
        (MarianConfig, MarianMTModel),
237
        (BartConfig, BartForConditionalGeneration),
Iz Beltagy's avatar
Iz Beltagy committed
238
        (LongformerConfig, LongformerForMaskedLM),
239
        (RobertaConfig, RobertaForMaskedLM),
240
241
242
        (BertConfig, BertForMaskedLM),
        (OpenAIGPTConfig, OpenAIGPTLMHeadModel),
        (GPT2Config, GPT2LMHeadModel),
Vasily Shamporov's avatar
Vasily Shamporov committed
243
        (MobileBertConfig, MobileBertForMaskedLM),
244
245
        (TransfoXLConfig, TransfoXLLMHeadModel),
        (XLNetConfig, XLNetLMHeadModel),
Lysandre's avatar
Lysandre committed
246
        (FlaubertConfig, FlaubertWithLMHeadModel),
247
248
        (XLMConfig, XLMWithLMHeadModel),
        (CTRLConfig, CTRLLMHeadModel),
Lysandre Debut's avatar
Lysandre Debut committed
249
        (ElectraConfig, ElectraForMaskedLM),
250
        (EncoderDecoderConfig, EncoderDecoderModel),
Patrick von Platen's avatar
Patrick von Platen committed
251
        (ReformerConfig, ReformerModelWithLMHead),
252
253
254
    ]
)

255
256
MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
    [
Suraj Patil's avatar
Suraj Patil committed
257
        (CamembertConfig, CamembertForCausalLM),
258
        (RobertaConfig, RobertaForCausalLM),
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
        (BertConfig, BertLMHeadModel),
        (OpenAIGPTConfig, OpenAIGPTLMHeadModel),
        (GPT2Config, GPT2LMHeadModel),
        (TransfoXLConfig, TransfoXLLMHeadModel),
        (XLNetConfig, XLNetLMHeadModel),
        (
            XLMConfig,
            XLMWithLMHeadModel,
        ),  # XLM can be MLM and CLM => model should be split similar to BERT; leave here for now
        (CTRLConfig, CTRLLMHeadModel),
        (ReformerConfig, ReformerModelWithLMHead),
    ]
)

MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
    [
        (DistilBertConfig, DistilBertForMaskedLM),
        (AlbertConfig, AlbertForMaskedLM),
277
        (BartConfig, BartForConditionalGeneration),
278
279
280
281
282
        (CamembertConfig, CamembertForMaskedLM),
        (XLMRobertaConfig, XLMRobertaForMaskedLM),
        (LongformerConfig, LongformerForMaskedLM),
        (RobertaConfig, RobertaForMaskedLM),
        (BertConfig, BertForMaskedLM),
Vasily Shamporov's avatar
Vasily Shamporov committed
283
        (MobileBertConfig, MobileBertForMaskedLM),
284
285
286
        (FlaubertConfig, FlaubertWithLMHeadModel),
        (XLMConfig, XLMWithLMHeadModel),
        (ElectraConfig, ElectraForMaskedLM),
287
        (ReformerConfig, ReformerForMaskedLM),
288
289
290
291
292
293
    ]
)

MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = OrderedDict(
    [
        (T5Config, T5ForConditionalGeneration),
294
        (PegasusConfig, PegasusForConditionalGeneration),
295
        (MarianConfig, MarianMTModel),
296
        (MBartConfig, MBartForConditionalGeneration),
297
298
299
300
301
        (BartConfig, BartForConditionalGeneration),
        (EncoderDecoderConfig, EncoderDecoderModel),
    ]
)

Julien Chaumond's avatar
Julien Chaumond committed
302
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
303
304
305
306
307
    [
        (DistilBertConfig, DistilBertForSequenceClassification),
        (AlbertConfig, AlbertForSequenceClassification),
        (CamembertConfig, CamembertForSequenceClassification),
        (XLMRobertaConfig, XLMRobertaForSequenceClassification),
Sam Shleifer's avatar
Sam Shleifer committed
308
        (BartConfig, BartForSequenceClassification),
309
        (LongformerConfig, LongformerForSequenceClassification),
310
        (RobertaConfig, RobertaForSequenceClassification),
311
312
        (BertConfig, BertForSequenceClassification),
        (XLNetConfig, XLNetForSequenceClassification),
Vasily Shamporov's avatar
Vasily Shamporov committed
313
        (MobileBertConfig, MobileBertForSequenceClassification),
Lysandre's avatar
Lysandre committed
314
        (FlaubertConfig, FlaubertForSequenceClassification),
Lysandre's avatar
Lysandre committed
315
        (XLMConfig, XLMForSequenceClassification),
316
        (ElectraConfig, ElectraForSequenceClassification),
317
318
319
    ]
)

Julien Chaumond's avatar
Julien Chaumond committed
320
MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
321
322
323
    [
        (DistilBertConfig, DistilBertForQuestionAnswering),
        (AlbertConfig, AlbertForQuestionAnswering),
324
        (CamembertConfig, CamembertForQuestionAnswering),
Suraj Patil's avatar
Suraj Patil committed
325
        (BartConfig, BartForQuestionAnswering),
326
        (LongformerConfig, LongformerForQuestionAnswering),
327
        (XLMRobertaConfig, XLMRobertaForQuestionAnswering),
Malte Pietsch's avatar
Malte Pietsch committed
328
        (RobertaConfig, RobertaForQuestionAnswering),
329
        (BertConfig, BertForQuestionAnswering),
330
331
        (XLNetConfig, XLNetForQuestionAnsweringSimple),
        (FlaubertConfig, FlaubertForQuestionAnsweringSimple),
Vasily Shamporov's avatar
Vasily Shamporov committed
332
        (MobileBertConfig, MobileBertForQuestionAnswering),
333
        (XLMConfig, XLMForQuestionAnsweringSimple),
334
        (ElectraConfig, ElectraForQuestionAnswering),
335
        (ReformerConfig, ReformerForQuestionAnswering),
336
337
338
    ]
)

Julien Chaumond's avatar
Julien Chaumond committed
339
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
Julien Chaumond's avatar
Julien Chaumond committed
340
341
342
    [
        (DistilBertConfig, DistilBertForTokenClassification),
        (CamembertConfig, CamembertForTokenClassification),
343
        (FlaubertConfig, FlaubertForTokenClassification),
344
        (XLMConfig, XLMForTokenClassification),
345
        (XLMRobertaConfig, XLMRobertaForTokenClassification),
346
        (LongformerConfig, LongformerForTokenClassification),
347
        (RobertaConfig, RobertaForTokenClassification),
Julien Chaumond's avatar
Julien Chaumond committed
348
        (BertConfig, BertForTokenClassification),
Vasily Shamporov's avatar
Vasily Shamporov committed
349
        (MobileBertConfig, MobileBertForTokenClassification),
Julien Chaumond's avatar
Julien Chaumond committed
350
        (XLNetConfig, XLNetForTokenClassification),
351
        (AlbertConfig, AlbertForTokenClassification),
Lysandre Debut's avatar
Lysandre Debut committed
352
        (ElectraConfig, ElectraForTokenClassification),
353
        (FlaubertConfig, FlaubertForTokenClassification),
Julien Chaumond's avatar
Julien Chaumond committed
354
355
356
    ]
)

Julien Chaumond's avatar
Julien Chaumond committed
357
358
359
MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
    [
        (CamembertConfig, CamembertForMultipleChoice),
Suraj Patil's avatar
Suraj Patil committed
360
        (ElectraConfig, ElectraForMultipleChoice),
Julien Chaumond's avatar
Julien Chaumond committed
361
        (XLMRobertaConfig, XLMRobertaForMultipleChoice),
362
        (LongformerConfig, LongformerForMultipleChoice),
Julien Chaumond's avatar
Julien Chaumond committed
363
364
        (RobertaConfig, RobertaForMultipleChoice),
        (BertConfig, BertForMultipleChoice),
365
        (DistilBertConfig, DistilBertForMultipleChoice),
Vasily Shamporov's avatar
Vasily Shamporov committed
366
        (MobileBertConfig, MobileBertForMultipleChoice),
Julien Chaumond's avatar
Julien Chaumond committed
367
        (XLNetConfig, XLNetForMultipleChoice),
368
        (AlbertConfig, AlbertForMultipleChoice),
369
370
        (XLMConfig, XLMForMultipleChoice),
        (FlaubertConfig, FlaubertForMultipleChoice),
Julien Chaumond's avatar
Julien Chaumond committed
371
372
373
374
375
    ]
)


class AutoModel:
thomwolf's avatar
thomwolf committed
376
    r"""
Lysandre's avatar
Lysandre committed
377
378
379
380
    :class:`~transformers.AutoModel` is a generic model class
    that will be instantiated as one of the base model classes of the library
    when created with the `AutoModel.from_pretrained(pretrained_model_name_or_path)`
    or the `AutoModel.from_config(config)` class methods.
thomwolf's avatar
thomwolf committed
381

Lysandre's avatar
Lysandre committed
382
    This class cannot be instantiated using `__init__()` (throws an error).
thomwolf's avatar
thomwolf committed
383
    """
384

thomwolf's avatar
thomwolf committed
385
    def __init__(self):
386
387
        raise EnvironmentError(
            "AutoModel is designed to be instantiated "
388
            "using the `AutoModel.from_pretrained(pretrained_model_name_or_path)` or "
389
390
            "`AutoModel.from_config(config)` methods."
        )
391
392
393

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
394
        r"""Instantiates one of the base model classes of the library
395
396
        from a configuration.

Lysandre's avatar
Lysandre committed
397
398
399
400
401
        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

Lysandre's avatar
Lysandre committed
402
403
        Args:
            config (:class:`~transformers.PretrainedConfig`):
404
                The model class to instantiate is selected based on the configuration class:
Lysandre's avatar
Lysandre committed
405
406

                - isInstance of `distilbert` configuration class: :class:`~transformers.DistilBertModel` (DistilBERT model)
Iz Beltagy's avatar
Iz Beltagy committed
407
                - isInstance of `longformer` configuration class: :class:`~transformers.LongformerModel` (Longformer model)
Lysandre's avatar
Lysandre committed
408
409
410
411
412
413
414
415
                - isInstance of `roberta` configuration class: :class:`~transformers.RobertaModel` (RoBERTa model)
                - isInstance of `bert` configuration class: :class:`~transformers.BertModel` (Bert model)
                - isInstance of `openai-gpt` configuration class: :class:`~transformers.OpenAIGPTModel` (OpenAI GPT model)
                - isInstance of `gpt2` configuration class: :class:`~transformers.GPT2Model` (OpenAI GPT-2 model)
                - isInstance of `ctrl` configuration class: :class:`~transformers.CTRLModel` (Salesforce CTRL  model)
                - isInstance of `transfo-xl` configuration class: :class:`~transformers.TransfoXLModel` (Transformer-XL model)
                - isInstance of `xlnet` configuration class: :class:`~transformers.XLNetModel` (XLNet model)
                - isInstance of `xlm` configuration class: :class:`~transformers.XLMModel` (XLM model)
Lysandre Debut's avatar
Lysandre Debut committed
416
417
                - isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertModel` (Flaubert model)
                - isInstance of `electra` configuration class: :class:`~transformers.ElectraModel` (Electra model)
418
419
420

        Examples::

421
422
            >>> config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
            >>> model = AutoModel.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
423
        """
Julien Chaumond's avatar
Julien Chaumond committed
424
425
426
        for config_class, model_class in MODEL_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)
427
428
429
430
431
432
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_MAPPING.keys())
            )
        )
thomwolf's avatar
thomwolf committed
433
434
435

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
436
        r"""Instantiates one of the base model classes of the library
thomwolf's avatar
thomwolf committed
437
438
        from a pre-trained model configuration.

Lysandre's avatar
Lysandre committed
439
440
        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
441
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
442

443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
            - `t5`: :class:`~transformers.T5Model` (T5 model)
            - `distilbert`: :class:`~transformers.DistilBertModel` (DistilBERT model)
            - `albert`: :class:`~transformers.AlbertModel` (ALBERT model)
            - `camembert`: :class:`~transformers.CamembertModel` (CamemBERT model)
            - `xlm-roberta`: :class:`~transformers.XLMRobertaModel` (XLM-RoBERTa model)
            - `longformer` :class:`~transformers.LongformerModel` (Longformer model)
            - `roberta`: :class:`~transformers.RobertaModel` (RoBERTa model)
            - `bert`: :class:`~transformers.BertModel` (Bert model)
            - `openai-gpt`: :class:`~transformers.OpenAIGPTModel` (OpenAI GPT model)
            - `gpt2`: :class:`~transformers.GPT2Model` (OpenAI GPT-2 model)
            - `transfo-xl`: :class:`~transformers.TransfoXLModel` (Transformer-XL model)
            - `xlnet`: :class:`~transformers.XLNetModel` (XLNet model)
            - `xlm`: :class:`~transformers.XLMModel` (XLM model)
            - `ctrl`: :class:`~transformers.CTRLModel` (Salesforce CTRL  model)
            - `flaubert`: :class:`~transformers.FlaubertModel` (Flaubert  model)
            - `electra`: :class:`~transformers.ElectraModel` (Electra  model)

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`
thomwolf's avatar
thomwolf committed
462

Lysandre's avatar
Lysandre committed
463
        Args:
thomwolf's avatar
thomwolf committed
464
465
466
            pretrained_model_name_or_path: either:

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
467
                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
468
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
thomwolf's avatar
thomwolf committed
469
470
471
472
473
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.

            model_args: (`optional`) Sequence of positional arguments:
                All remaning positional arguments will be passed to the underlying model's ``__init__`` method

474
            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
thomwolf's avatar
thomwolf committed
475
476
477
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
478
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
thomwolf's avatar
thomwolf committed
479
480
481
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
482
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
thomwolf's avatar
typos  
thomwolf committed
483
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
484
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
thomwolf's avatar
thomwolf committed
485
486

            cache_dir: (`optional`) string:
thomwolf's avatar
thomwolf committed
487
488
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.
thomwolf's avatar
thomwolf committed
489
490
491
492

            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.

493
494
495
            resume_download: (`optional`) boolean, default False:
                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.

thomwolf's avatar
thomwolf committed
496
497
498
499
500
            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.

            output_loading_info: (`optional`) boolean:
501
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
thomwolf's avatar
thomwolf committed
502
503

            kwargs: (`optional`) Remaining dictionary of keyword arguments:
504
                These arguments will be passed to the configuration and the model.
thomwolf's avatar
thomwolf committed
505
506
507

        Examples::

thomwolf's avatar
thomwolf committed
508
            model = AutoModel.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
509
            assert model.config.output_attentions == True
thomwolf's avatar
thomwolf committed
510
511
512
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
            model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
thomwolf's avatar
thomwolf committed
513
514

        """
515
516
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
517
518
519
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
520

Julien Chaumond's avatar
Julien Chaumond committed
521
522
523
        for config_class, model_class in MODEL_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
524
        raise ValueError(
525
526
527
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_MAPPING.keys())
528
529
            )
        )
530
531


Julien Chaumond's avatar
Julien Chaumond committed
532
class AutoModelForPreTraining:
thomwolf's avatar
thomwolf committed
533
    r"""
Lysandre's avatar
Lysandre committed
534
535
536
    :class:`~transformers.AutoModelForPreTraining` is a generic model class
    that will be instantiated as one of the model classes of the library -with the architecture used for pretraining this model– when created with the `AutoModelForPreTraining.from_pretrained(pretrained_model_name_or_path)`
    class method.
thomwolf's avatar
thomwolf committed
537

Lysandre's avatar
Lysandre committed
538
    This class cannot be instantiated using `__init__()` (throws an error).
thomwolf's avatar
thomwolf committed
539
540
541
542
543
544
545
546
547
548
549
    """

    def __init__(self):
        raise EnvironmentError(
            "AutoModelForPreTraining is designed to be instantiated "
            "using the `AutoModelForPreTraining.from_pretrained(pretrained_model_name_or_path)` or "
            "`AutoModelForPreTraining.from_config(config)` methods."
        )

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
550
        r"""Instantiates one of the base model classes of the library
thomwolf's avatar
thomwolf committed
551
552
        from a configuration.

Lysandre's avatar
Lysandre committed
553
554
555
556
557
        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

thomwolf's avatar
thomwolf committed
558
559
560
561
        Args:
            config (:class:`~transformers.PretrainedConfig`):
                The model class to instantiate is selected based on the configuration class:

562
                - isInstance of `distilbert` configuration class: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
Iz Beltagy's avatar
Iz Beltagy committed
563
                - isInstance of `longformer` configuration class: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
564
                - isInstance of `roberta` configuration class: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
thomwolf's avatar
thomwolf committed
565
566
                - isInstance of `bert` configuration class: :class:`~transformers.BertForPreTraining` (Bert model)
                - isInstance of `openai-gpt` configuration class: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
567
568
                - isInstance of `gpt2` configuration class: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
                - isInstance of `ctrl` configuration class: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL  model)
thomwolf's avatar
thomwolf committed
569
570
571
                - isInstance of `transfo-xl` configuration class: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
                - isInstance of `xlnet` configuration class: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
                - isInstance of `xlm` configuration class: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
Lysandre's avatar
Lysandre committed
572
                - isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
Lysandre Debut's avatar
Lysandre Debut committed
573
                - isInstance of `electra` configuration class: :class:`~transformers.ElectraForPreTraining` (Electra model)
thomwolf's avatar
thomwolf committed
574
575
576

        Examples::

577
578
            >>> config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
            >>> model = AutoModelForPreTraining.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
thomwolf's avatar
thomwolf committed
579
580
581
582
583
584
585
586
587
588
589
590
591
        """
        for config_class, model_class in MODEL_FOR_PRETRAINING_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_PRETRAINING_MAPPING.keys())
            )
        )

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
592
        r"""Instantiates one of the model classes of the library -with the architecture used for pretraining this model– from a pre-trained model configuration.
thomwolf's avatar
thomwolf committed
593
594
595

        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
596
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
597

598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
            - `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model)
            - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
            - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
            - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
            - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
            - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
            - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
            - `bert`: :class:`~transformers.BertForPreTraining` (Bert model)
            - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
            - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
            - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
            - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
            - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
            - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
            - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
            - `electra`: :class:`~transformers.ElectraForPreTraining` (Electra model)
thomwolf's avatar
thomwolf committed
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`

        Args:
            pretrained_model_name_or_path:
                Either:

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
            model_args: (`optional`) Sequence of positional arguments:
                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
636
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
thomwolf's avatar
thomwolf committed
637
638
639
640
641
642
643
644
645
646
647
648
649
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
            cache_dir: (`optional`) string:
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.
            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
            resume_download: (`optional`) boolean, default False:
                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.
            output_loading_info: (`optional`) boolean:
650
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
thomwolf's avatar
thomwolf committed
651
            kwargs: (`optional`) Remaining dictionary of keyword arguments:
652
                These arguments will be passed to the configuration and the model.
thomwolf's avatar
thomwolf committed
653
654
655
656
657
658
659
660
661
662
663
664
665

        Examples::

            model = AutoModelForPreTraining.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
            model = AutoModelForPreTraining.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
            model = AutoModelForPreTraining.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
666
667
668
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
thomwolf's avatar
thomwolf committed
669
670
671
672
673
674
675
676
677
678
679
680

        for config_class, model_class in MODEL_FOR_PRETRAINING_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_PRETRAINING_MAPPING.keys())
            )
        )


Julien Chaumond's avatar
Julien Chaumond committed
681
class AutoModelWithLMHead:
682
    r"""
Lysandre's avatar
Lysandre committed
683
684
685
686
    :class:`~transformers.AutoModelWithLMHead` is a generic model class
    that will be instantiated as one of the language modeling model classes of the library
    when created with the `AutoModelWithLMHead.from_pretrained(pretrained_model_name_or_path)`
    class method.
687

Lysandre's avatar
Lysandre committed
688
    This class cannot be instantiated using `__init__()` (throws an error).
689
    """
690

691
    def __init__(self):
692
693
        raise EnvironmentError(
            "AutoModelWithLMHead is designed to be instantiated "
694
            "using the `AutoModelWithLMHead.from_pretrained(pretrained_model_name_or_path)` or "
695
696
            "`AutoModelWithLMHead.from_config(config)` methods."
        )
697
698
699

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
700
        r"""Instantiates one of the base model classes of the library
701
702
        from a configuration.

Lysandre's avatar
Lysandre committed
703
704
705
706
707
        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

Lysandre's avatar
Lysandre committed
708
709
        Args:
            config (:class:`~transformers.PretrainedConfig`):
710
                The model class to instantiate is selected based on the configuration class:
Lysandre's avatar
Lysandre committed
711

712
                - isInstance of `distilbert` configuration class: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
Iz Beltagy's avatar
Iz Beltagy committed
713
                - isInstance of `longformer` configuration class: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
714
715
                - isInstance of `roberta` configuration class: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
                - isInstance of `bert` configuration class: :class:`~transformers.BertForMaskedLM` (Bert model)
Lysandre's avatar
Lysandre committed
716
                - isInstance of `openai-gpt` configuration class: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
717
718
                - isInstance of `gpt2` configuration class: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
                - isInstance of `ctrl` configuration class: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL  model)
Lysandre's avatar
Lysandre committed
719
720
721
                - isInstance of `transfo-xl` configuration class: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
                - isInstance of `xlnet` configuration class: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
                - isInstance of `xlm` configuration class: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
Lysandre's avatar
Lysandre committed
722
                - isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
Lysandre Debut's avatar
Lysandre Debut committed
723
                - isInstance of `electra` configuration class: :class:`~transformers.ElectraForMaskedLM` (Electra model)
724
725
726
727
728
729

        Examples::

            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
            model = AutoModelWithLMHead.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        """
730
731
732
733
        warnings.warn(
            "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.",
            FutureWarning,
        )
734
735
736
737
738
739
740
741
742
        for config_class, model_class in MODEL_WITH_LM_HEAD_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_WITH_LM_HEAD_MAPPING.keys())
            )
        )
743
744
745

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
746
        r"""Instantiates one of the language modeling model classes of the library
747
748
749
        from a pre-trained model configuration.

        The `from_pretrained()` method takes care of returning the correct model class instance
750
        based on the `model_type` property of the config object, or when it's missing,
751
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
752

753
            - `t5`: :class:`~transformers.T5ForConditionalGeneration` (T5 model)
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
            - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
            - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
            - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
            - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
            - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
            - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
            - `bert`: :class:`~transformers.BertForMaskedLM` (Bert model)
            - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
            - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
            - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
            - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
            - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
            - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
            - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
            - `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model)
769
770
771
772

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`

Lysandre's avatar
Lysandre committed
773
774
775
        Args:
            pretrained_model_name_or_path:
                Either:
thomwolf's avatar
thomwolf committed
776
777

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
778
                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
779
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
thomwolf's avatar
thomwolf committed
780
781
782
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
            model_args: (`optional`) Sequence of positional arguments:
                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
783
            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
thomwolf's avatar
thomwolf committed
784
785
786
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
787
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
thomwolf's avatar
thomwolf committed
788
789
790
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
791
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
792
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
793
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
thomwolf's avatar
thomwolf committed
794
            cache_dir: (`optional`) string:
795
796
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.
thomwolf's avatar
thomwolf committed
797
798
            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
799
            resume_download: (`optional`) boolean, default False:
Lysandre's avatar
Lysandre committed
800
                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
thomwolf's avatar
thomwolf committed
801
802
803
804
            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.
            output_loading_info: (`optional`) boolean:
805
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
thomwolf's avatar
thomwolf committed
806
            kwargs: (`optional`) Remaining dictionary of keyword arguments:
807
                These arguments will be passed to the configuration and the model.
808
809
810
811
812
813
814
815
816
817
818

        Examples::

            model = AutoModelWithLMHead.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
            model = AutoModelWithLMHead.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
            model = AutoModelWithLMHead.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
819
820
821
822
        warnings.warn(
            "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.",
            FutureWarning,
        )
823
824
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
825
826
827
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
828

829
830
831
        for config_class, model_class in MODEL_WITH_LM_HEAD_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
832
        raise ValueError(
833
834
835
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_WITH_LM_HEAD_MAPPING.keys())
836
            )
837
838
839
840
841
        )


class AutoModelForCausalLM:
    r"""
Lysandre's avatar
Lysandre committed
842
843
844
845
    :class:`~transformers.AutoModelForCausalLM` is a generic model class
    that will be instantiated as one of the language modeling model classes of the library
    when created with the `AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path)`
    class method.
846

Lysandre's avatar
Lysandre committed
847
    This class cannot be instantiated using `__init__()` (throws an error).
848
849
850
851
852
853
854
855
856
857
858
    """

    def __init__(self):
        raise EnvironmentError(
            "AutoModelForCausalLM is designed to be instantiated "
            "using the `AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path)` or "
            "`AutoModelForCausalLM.from_config(config)` methods."
        )

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
859
        r"""Instantiates one of the base model classes of the library
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
        from a configuration.

        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

        Args:
            config (:class:`~transformers.PretrainedConfig`):
                The model class to instantiate is selected based on the configuration class:

                - isInstance of `bert` configuration class: :class:`~transformers.BertLMHeadModel` (Bert model)
                - isInstance of `openai-gpt` configuration class: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
                - isInstance of `gpt2` configuration class: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
                - isInstance of `ctrl` configuration class: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL  model)
                - isInstance of `transfo-xl` configuration class: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
                - isInstance of `xlnet` configuration class: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
                - isInstance of `reformer` configuration class: :class:`~transformers.ReformerModelWithLMHead` (Reformer model)

        Examples::

            config = GPT2Config.from_pretrained('gpt2')    # Download configuration from S3 and cache.
            model = AutoModelForCausalLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        """
        for config_class, model_class in MODEL_FOR_CAUSAL_LM_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys())
            )
        )

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
896
        r"""Instantiates one of the language modeling model classes of the library
897
898
899
900
901
        from a pre-trained model configuration.

        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
902

903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
            - `bert`: :class:`~transformers.BertLMHeadModel` (Bert model)
            - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
            - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
            - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
            - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
            - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
            - `reformer`: :class:`~transformers.ReformerModelWithLMHead` (Google Reformer model)

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`

        Args:
            pretrained_model_name_or_path:
                Either:

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
            model_args: (`optional`) Sequence of positional arguments:
                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
            cache_dir: (`optional`) string:
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.
            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
            resume_download: (`optional`) boolean, default False:
                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.
            output_loading_info: (`optional`) boolean:
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
            kwargs: (`optional`) Remaining dictionary of keyword arguments:
                These arguments will be passed to the configuration and the model.

        Examples::

            model = AutoModelForCausalLM.from_pretrained('gpt2')    # Download model and configuration from S3 and cache.
            model = AutoModelForCausalLM.from_pretrained('./test/gpt2_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/gpt2_tf_model_config.json')
            model =  AutoModelForCausalLM.from_pretrained('./tf_model/gpt2_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
962
963
964
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
965
966
967
968
969
970
971
972
973
974
975
976
977
978

        for config_class, model_class in MODEL_FOR_CAUSAL_LM_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys())
            )
        )


class AutoModelForMaskedLM:
    r"""
Lysandre's avatar
Lysandre committed
979
980
981
982
    :class:`~transformers.AutoModelForMaskedLM` is a generic model class
    that will be instantiated as one of the language modeling model classes of the library
    when created with the `AutoModelForMaskedLM.from_pretrained(pretrained_model_name_or_path)`
    class method.
983

Lysandre's avatar
Lysandre committed
984
    This class cannot be instantiated using `__init__()` (throws an error).
985
986
987
988
989
990
991
992
993
994
995
    """

    def __init__(self):
        raise EnvironmentError(
            "AutoModelForMaskedLM is designed to be instantiated "
            "using the `AutoModelForMaskedLM.from_pretrained(pretrained_model_name_or_path)` or "
            "`AutoModelForMaskedLM.from_config(config)` methods."
        )

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
996
        r"""Instantiates one of the base model classes of the library
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
        from a configuration.

        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

        Args:
            config (:class:`~transformers.PretrainedConfig`):
                The model class to instantiate is selected based on the configuration class:
                - isInstance of `distilbert` configuration class: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
                - isInstance of `longformer` configuration class: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
                - isInstance of `roberta` configuration class: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
                - isInstance of `bert` configuration class: :class:`~transformers.BertForMaskedLM` (Bert model)
                - isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
                - isInstance of `xlm` configuration class: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
                - isInstance of `xlm-roberta` configuration class: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-Roberta model)
                - isInstance of `electra` configuration class: :class:`~transformers.ElectraForMaskedLM` (Electra model)
                - isInstance of `camembert` configuration class: :class:`~transformers.CamembertForMaskedLM` (Camembert model)
                - isInstance of `albert` configuration class: :class:`~transformers.AlbertForMaskedLM` (Albert model)


        Examples::

            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
            model = AutoModelForMaskedLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        """
        for config_class, model_class in MODEL_FOR_MASKED_LM_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_MASKED_LM_MAPPING.keys())
            )
        )

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
1036
        r"""Instantiates one of the language modeling model classes of the library
1037
1038
1039
1040
1041
        from a pre-trained model configuration.

        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
1042

1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
            - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
            - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
            - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
            - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
            - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
            - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
            - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
            - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
            - `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model)
            - `bert`: :class:`~transformers.BertLMHeadModel` (Bert model)

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`

        Args:
            pretrained_model_name_or_path:
                Either:

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
            model_args: (`optional`) Sequence of positional arguments:
                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
            cache_dir: (`optional`) string:
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.
            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
            resume_download: (`optional`) boolean, default False:
                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.
            output_loading_info: (`optional`) boolean:
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
            kwargs: (`optional`) Remaining dictionary of keyword arguments:
                These arguments will be passed to the configuration and the model.

        Examples::

            model = AutoModelForMaskedLM.from_pretrained('bert')    # Download model and configuration from S3 and cache.
            model = AutoModelForMaskedLM.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
            model =  AutoModelForMaskedLM.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
1105
1106
1107
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121

        for config_class, model_class in MODEL_FOR_MASKED_LM_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_MASKED_LM_MAPPING.keys())
            )
        )


class AutoModelForSeq2SeqLM:
    r"""
Lysandre's avatar
Lysandre committed
1122
1123
1124
1125
    :class:`~transformers.AutoModelForSeq2SeqLM` is a generic model class
    that will be instantiated as one of the language modeling model classes of the library
    when created with the `AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path)`
    class method.
1126

Lysandre's avatar
Lysandre committed
1127
    This class cannot be instantiated using `__init__()` (throws an error).
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
    """

    def __init__(self):
        raise EnvironmentError(
            "AutoModelForSeq2SeqLM is designed to be instantiated "
            "using the `AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path)` or "
            "`AutoModelForSeq2SeqLM.from_config(config)` methods."
        )

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
1139
        r"""Instantiates one of the base model classes of the library
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
        from a configuration.

        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

        Args:
            config (:class:`~transformers.PretrainedConfig`):
                The model class to instantiate is selected based on the configuration class:

                - isInstance of `t5` configuration class: :class:`~transformers.T5ForConditionalGeneration` (T5 model)
                - isInstance of `bart` configuration class: :class:`~transformers.BartForConditionalGeneration` (Bart model)
                - isInstance of `marian` configuration class: :class:`~transformers.MarianMTModel` (Marian model)
                - isInstance of `encoder-decoder` configuration class: :class:`~transformers.EncoderDecoderModel` (Encoder Decoder model)

        Examples::

            config = T5Config.from_pretrained('t5')
            model = AutoModelForSeq2SeqLM.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        """
        for config_class, model_class in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys()),
            )
        )

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
1175
        r"""Instantiates one of the language modeling model classes of the library
1176
1177
1178
1179
1180
        from a pre-trained model configuration.

        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
1181

1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
            - `t5`: :class:`~transformers.T5ForConditionalGeneration` (T5 model)
            - `bart`: :class:`~transformers.BartForConditionalGeneration` (Bert model)
            - `marian`: :class:`~transformers.MarianMTModel` (Marian model)
            - `encoder-decoder`: :class:`~transformers.EncoderDecoderModel` (Encoder Decoder model)

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`

        Args:
            pretrained_model_name_or_path:
                Either:

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
            model_args: (`optional`) Sequence of positional arguments:
                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
            cache_dir: (`optional`) string:
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.
            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
            resume_download: (`optional`) boolean, default False:
                Do not delete incompletely received file. Attempt to resume the download if such a file exists.
            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.
            output_loading_info: (`optional`) boolean:
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
            kwargs: (`optional`) Remaining dictionary of keyword arguments:
                These arguments will be passed to the configuration and the model.

        Examples::

            model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')    # Download model and configuration from S3 and cache.
            model = AutoModelForSeq2SeqLM.from_pretrained('./test/t5_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/t5_tf_model_config.json')
            model =  AutoModelForSeq2SeqLM.from_pretrained('./tf_model/t5_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
1238
1239
1240
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251

        for config_class, model_class in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys()),
            )
1252
        )
1253
1254


Julien Chaumond's avatar
Julien Chaumond committed
1255
class AutoModelForSequenceClassification:
1256
    r"""
Lysandre's avatar
Lysandre committed
1257
1258
1259
1260
    :class:`~transformers.AutoModelForSequenceClassification` is a generic model class
    that will be instantiated as one of the sequence classification model classes of the library
    when created with the `AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path)`
    class method.
1261

Lysandre's avatar
Lysandre committed
1262
    This class cannot be instantiated using `__init__()` (throws an error).
1263
    """
1264

1265
    def __init__(self):
1266
1267
        raise EnvironmentError(
            "AutoModelForSequenceClassification is designed to be instantiated "
1268
            "using the `AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path)` or "
1269
1270
            "`AutoModelForSequenceClassification.from_config(config)` methods."
        )
1271
1272
1273

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
1274
        r"""Instantiates one of the base model classes of the library
1275
1276
        from a configuration.

Lysandre's avatar
Lysandre committed
1277
1278
1279
1280
1281
        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

Lysandre's avatar
Lysandre committed
1282
1283
        Args:
            config (:class:`~transformers.PretrainedConfig`):
1284
                The model class to instantiate is selected based on the configuration class:
Lysandre's avatar
Lysandre committed
1285

1286
1287
1288
1289
1290
1291
1292
1293
                - isInstance of `distilbert` configuration class: :class:`~transformers.DistilBertForSequenceClassification` (DistilBERT model)
                - isInstance of `albert` configuration class: :class:`~transformers.AlbertForSequenceClassification` (ALBERT model)
                - isInstance of `camembert` configuration class: :class:`~transformers.CamembertForSequenceClassification` (CamemBERT model)
                - isInstance of `xlm roberta` configuration class: :class:`~transformers.XLMRobertaForSequenceClassification` (XLM-RoBERTa model)
                - isInstance of `roberta` configuration class: :class:`~transformers.RobertaForSequenceClassification` (RoBERTa model)
                - isInstance of `bert` configuration class: :class:`~transformers.BertForSequenceClassification` (Bert model)
                - isInstance of `xlnet` configuration class: :class:`~transformers.XLNetForSequenceClassification` (XLNet model)
                - isInstance of `xlm` configuration class: :class:`~transformers.XLMForSequenceClassification` (XLM model)
Lysandre's avatar
Lysandre committed
1294
                - isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model)
Lysandre's avatar
Lysandre committed
1295

1296
1297
1298
1299
1300
1301

        Examples::

            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
            model = AutoModelForSequenceClassification.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        """
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
        for config_class, model_class in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)
        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys()),
            )
        )
1313
1314
1315

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
1316
        r"""Instantiates one of the sequence classification model classes of the library
1317
1318
1319
        from a pre-trained model configuration.

        The `from_pretrained()` method takes care of returning the correct model class instance
1320
        based on the `model_type` property of the config object, or when it's missing,
1321
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
1322

1323
1324
1325
1326
1327
1328
1329
1330
            - `distilbert`: :class:`~transformers.DistilBertForSequenceClassification` (DistilBERT model)
            - `albert`: :class:`~transformers.AlbertForSequenceClassification` (ALBERT model)
            - `camembert`: :class:`~transformers.CamembertForSequenceClassification` (CamemBERT model)
            - `xlm-roberta`: :class:`~transformers.XLMRobertaForSequenceClassification` (XLM-RoBERTa model)
            - `roberta`: :class:`~transformers.RobertaForSequenceClassification` (RoBERTa model)
            - `bert`: :class:`~transformers.BertForSequenceClassification` (Bert model)
            - `xlnet`: :class:`~transformers.XLNetForSequenceClassification` (XLNet model)
            - `flaubert`: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model)
1331
1332
1333
1334

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`

Lysandre's avatar
Lysandre committed
1335
        Args:
thomwolf's avatar
thomwolf committed
1336
1337
1338
            pretrained_model_name_or_path: either:

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
1339
                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
1340
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
thomwolf's avatar
thomwolf committed
1341
1342
1343
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.

            model_args: (`optional`) Sequence of positional arguments:
Lysandre's avatar
Lysandre committed
1344
                All remaining positional arguments will be passed to the underlying model's ``__init__`` method
thomwolf's avatar
thomwolf committed
1345

1346
            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
thomwolf's avatar
thomwolf committed
1347
1348
1349
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
1350
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
thomwolf's avatar
thomwolf committed
1351
1352
1353
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
1354
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
1355
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
1356
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
thomwolf's avatar
thomwolf committed
1357
1358

            cache_dir: (`optional`) string:
1359
1360
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.
thomwolf's avatar
thomwolf committed
1361
1362
1363
1364

            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.

1365
1366
1367
            resume_download: (`optional`) boolean, default False:
                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.

thomwolf's avatar
thomwolf committed
1368
1369
1370
1371
1372
            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.

            output_loading_info: (`optional`) boolean:
1373
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
thomwolf's avatar
thomwolf committed
1374
1375

            kwargs: (`optional`) Remaining dictionary of keyword arguments:
1376
                These arguments will be passed to the configuration and the model.
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387

        Examples::

            model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
            model = AutoModelForSequenceClassification.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
            model = AutoModelForSequenceClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
1388
1389
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
1390
1391
1392
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
1393

1394
1395
1396
        for config_class, model_class in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
1397
        raise ValueError(
1398
1399
1400
1401
1402
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys()),
1403
1404
            )
        )
1405
1406


Julien Chaumond's avatar
Julien Chaumond committed
1407
class AutoModelForQuestionAnswering:
1408
    r"""
Lysandre's avatar
Lysandre committed
1409
1410
1411
1412
    :class:`~transformers.AutoModelForQuestionAnswering` is a generic model class
    that will be instantiated as one of the question answering model classes of the library
    when created with the `AutoModelForQuestionAnswering.from_pretrained(pretrained_model_name_or_path)`
    class method.
1413

Lysandre's avatar
Lysandre committed
1414
    This class cannot be instantiated using `__init__()` (throws an error).
1415
    """
1416

1417
    def __init__(self):
1418
1419
        raise EnvironmentError(
            "AutoModelForQuestionAnswering is designed to be instantiated "
1420
            "using the `AutoModelForQuestionAnswering.from_pretrained(pretrained_model_name_or_path)` or "
1421
1422
            "`AutoModelForQuestionAnswering.from_config(config)` methods."
        )
1423
1424
1425

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
1426
        r"""Instantiates one of the base model classes of the library
1427
1428
        from a configuration.

Lysandre's avatar
Lysandre committed
1429
1430
1431
1432
1433
        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

Lysandre's avatar
Lysandre committed
1434
1435
        Args:
            config (:class:`~transformers.PretrainedConfig`):
1436
                The model class to instantiate is selected based on the configuration class:
Lysandre's avatar
Lysandre committed
1437

1438
1439
                - isInstance of `distilbert` configuration class: :class:`~transformers.DistilBertForQuestionAnswering` (DistilBERT model)
                - isInstance of `albert` configuration class: :class:`~transformers.AlbertForQuestionAnswering` (ALBERT model)
Lysandre's avatar
Lysandre committed
1440
                - isInstance of `bert` configuration class: :class:`~transformers.BertModelForQuestionAnswering` (Bert model)
1441
1442
                - isInstance of `xlnet` configuration class: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model)
                - isInstance of `xlm` configuration class: :class:`~transformers.XLMForQuestionAnswering` (XLM model)
Lysandre's avatar
Lysandre committed
1443
                - isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model)
1444
1445
1446
1447

        Examples::

            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
flozi00's avatar
flozi00 committed
1448
            model = AutoModelForQuestionAnswering.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
1449
        """
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
        for config_class, model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)

        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()),
            )
        )
1462
1463
1464

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
1465
        r"""Instantiates one of the question answering model classes of the library
1466
1467
1468
        from a pre-trained model configuration.

        The `from_pretrained()` method takes care of returning the correct model class instance
1469
        based on the `model_type` property of the config object, or when it's missing,
1470
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
1471

1472
1473
1474
1475
1476
1477
            - `distilbert`: :class:`~transformers.DistilBertForQuestionAnswering` (DistilBERT model)
            - `albert`: :class:`~transformers.AlbertForQuestionAnswering` (ALBERT model)
            - `bert`: :class:`~transformers.BertForQuestionAnswering` (Bert model)
            - `xlnet`: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model)
            - `xlm`: :class:`~transformers.XLMForQuestionAnswering` (XLM model)
            - `flaubert`: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model)
1478
1479
1480
1481

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`

Lysandre's avatar
Lysandre committed
1482
        Args:
thomwolf's avatar
thomwolf committed
1483
1484
1485
            pretrained_model_name_or_path: either:

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
1486
                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
1487
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
thomwolf's avatar
thomwolf committed
1488
1489
1490
1491
1492
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.

            model_args: (`optional`) Sequence of positional arguments:
                All remaning positional arguments will be passed to the underlying model's ``__init__`` method

1493
            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
thomwolf's avatar
thomwolf committed
1494
1495
1496
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
1497
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
thomwolf's avatar
thomwolf committed
1498
1499
1500
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
1501
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
1502
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
1503
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
thomwolf's avatar
thomwolf committed
1504
1505

            cache_dir: (`optional`) string:
1506
1507
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.
thomwolf's avatar
thomwolf committed
1508
1509
1510
1511
1512
1513
1514
1515
1516

            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.

            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.

            output_loading_info: (`optional`) boolean:
1517
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
thomwolf's avatar
thomwolf committed
1518
1519

            kwargs: (`optional`) Remaining dictionary of keyword arguments:
1520
                These arguments will be passed to the configuration and the model.
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531

        Examples::

            model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
            model = AutoModelForQuestionAnswering.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
            model = AutoModelForQuestionAnswering.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
1532
1533
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
1534
1535
1536
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
1537

1538
1539
1540
        for config_class, model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
1541

1542
        raise ValueError(
1543
1544
1545
1546
1547
1548
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()),
            )
1549
        )
1550
1551
1552


class AutoModelForTokenClassification:
Lysandre's avatar
Lysandre committed
1553
    r"""
Lysandre's avatar
Lysandre committed
1554
1555
1556
1557
    :class:`~transformers.AutoModelForTokenClassification` is a generic model class
    that will be instantiated as one of the token classification model classes of the library
    when created with the `AutoModelForTokenClassification.from_pretrained(pretrained_model_name_or_path)`
    class method.
Lysandre's avatar
Lysandre committed
1558

Lysandre's avatar
Lysandre committed
1559
    This class cannot be instantiated using `__init__()` (throws an error).
Lysandre's avatar
Lysandre committed
1560
1561
    """

1562
    def __init__(self):
1563
1564
1565
1566
1567
        raise EnvironmentError(
            "AutoModelForTokenClassification is designed to be instantiated "
            "using the `AutoModelForTokenClassification.from_pretrained(pretrained_model_name_or_path)` or "
            "`AutoModelForTokenClassification.from_config(config)` methods."
        )
1568
1569
1570

    @classmethod
    def from_config(cls, config):
Lysandre's avatar
Lysandre committed
1571
        r"""Instantiates one of the base model classes of the library
1572
        from a configuration.
1573

Lysandre's avatar
Lysandre committed
1574
1575
1576
1577
1578
        Note:
            Loading a model from its configuration file does **not** load the model weights.
            It only affects the model's configuration. Use :func:`~transformers.AutoModel.from_pretrained` to load
            the model weights

Lysandre's avatar
Lysandre committed
1579
1580
        Args:
            config (:class:`~transformers.PretrainedConfig`):
1581
                The model class to instantiate is selected based on the configuration class:
Lysandre's avatar
Lysandre committed
1582

Lysandre's avatar
Lysandre committed
1583
                - isInstance of `distilbert` configuration class: :class:`~transformers.DistilBertModelForTokenClassification` (DistilBERT model)
1584
                - isInstance of `xlm` configuration class: :class:`~transformers.XLMForTokenClassification` (XLM model)
Lysandre's avatar
Lysandre committed
1585
1586
                - isInstance of `xlm roberta` configuration class: :class:`~transformers.XLMRobertaModelForTokenClassification` (XLMRoberta model)
                - isInstance of `bert` configuration class: :class:`~transformers.BertModelForTokenClassification` (Bert model)
1587
                - isInstance of `albert` configuration class: :class:`~transformers.AlbertForTokenClassification` (AlBert model)
Lysandre's avatar
Lysandre committed
1588
                - isInstance of `xlnet` configuration class: :class:`~transformers.XLNetModelForTokenClassification` (XLNet model)
1589
                - isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertForTokenClassification` (Flaubert model)
Lysandre's avatar
Lysandre committed
1590
1591
                - isInstance of `camembert` configuration class: :class:`~transformers.CamembertModelForTokenClassification` (Camembert model)
                - isInstance of `roberta` configuration class: :class:`~transformers.RobertaModelForTokenClassification` (Roberta model)
Lysandre Debut's avatar
Lysandre Debut committed
1592
                - isInstance of `electra` configuration class: :class:`~transformers.ElectraForTokenClassification` (Electra model)
1593

1594
        Examples::
1595

1596
1597
1598
            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
            model = AutoModelForTokenClassification.from_config(config)  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        """
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
        for config_class, model_class in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)

        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys()),
            )
        )
1611

1612
1613
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
Lysandre's avatar
Lysandre committed
1614
        r"""Instantiates one of the question answering model classes of the library
1615
1616
1617
        from a pre-trained model configuration.

        The `from_pretrained()` method takes care of returning the correct model class instance
1618
        based on the `model_type` property of the config object, or when it's missing,
1619
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
1620

1621
1622
1623
1624
1625
1626
            - `distilbert`: :class:`~transformers.DistilBertForTokenClassification` (DistilBERT model)
            - `xlm`: :class:`~transformers.XLMForTokenClassification` (XLM model)
            - `xlm-roberta`: :class:`~transformers.XLMRobertaForTokenClassification` (XLM-RoBERTa?Para model)
            - `camembert`: :class:`~transformers.CamembertForTokenClassification` (Camembert model)
            - `bert`: :class:`~transformers.BertForTokenClassification` (Bert model)
            - `xlnet`: :class:`~transformers.XLNetForTokenClassification` (XLNet model)
1627
            - `flaubert`: :class:`~transformers.FlaubertForTokenClassification` (Flaubert model)
1628
1629
            - `roberta`: :class:`~transformers.RobertaForTokenClassification` (Roberta model)
            - `electra`: :class:`~transformers.ElectraForTokenClassification` (Electra model)
1630
1631
1632
1633

        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`

Lysandre's avatar
Lysandre committed
1634
1635
1636
        Args:
            pretrained_model_name_or_path:
                Either:
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652

                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.

            model_args: (`optional`) Sequence of positional arguments:
                All remaning positional arguments will be passed to the underlying model's ``__init__`` method

            config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:

                - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
                - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
                - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.

            state_dict: (`optional`) dict:
1653
                an optional state dictionary for the model to use instead of a state dictionary loaded from saved weights file.
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.

            cache_dir: (`optional`) string:
                Path to a directory in which a downloaded pre-trained model
                configuration should be cached if the standard cache should not be used.

            force_download: (`optional`) boolean, default False:
                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.

            proxies: (`optional`) dict, default None:
                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
                The proxies are used on each request.

            output_loading_info: (`optional`) boolean:
1669
                Set to ``True`` to also return a dictionary containing missing keys, unexpected keys and error messages.
1670
1671

            kwargs: (`optional`) Remaining dictionary of keyword arguments:
1672
                These arguments will be passed to the configuration and the model.
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683

        Examples::

            model = AutoModelForTokenClassification.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
            model = AutoModelForTokenClassification.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True
            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
            config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
            model = AutoModelForTokenClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
1684
1685
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
1686
1687
1688
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
1689

1690
1691
1692
        for config_class, model_class in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
1693

1694
        raise ValueError(
1695
1696
1697
1698
1699
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys()),
1700
1701
            )
        )
Julien Chaumond's avatar
Julien Chaumond committed
1702
1703
1704
1705


class AutoModelForMultipleChoice:
    r"""
Lysandre's avatar
Lysandre committed
1706
1707
1708
1709
    :class:`~transformers.AutoModelForMultipleChoice` is a generic model class
    that will be instantiated as one of the multiple choice model classes of the library
    when created with the `AutoModelForMultipleChoice.from_pretrained(pretrained_model_name_or_path)`
    class method.
Julien Chaumond's avatar
Julien Chaumond committed
1710

Lysandre's avatar
Lysandre committed
1711
    This class cannot be instantiated using `__init__()` (throws an error).
Julien Chaumond's avatar
Julien Chaumond committed
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
    """

    def __init__(self):
        raise EnvironmentError(
            "AutoModelForMultipleChoice is designed to be instantiated "
            "using the `AutoModelForMultipleChoice.from_pretrained(pretrained_model_name_or_path)` or "
            "`AutoModelForMultipleChoice.from_config(config)` methods."
        )

    @classmethod
    def from_config(cls, config):
        for config_class, model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
            if isinstance(config, config_class):
                return model_class(config)

        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()),
            )
        )

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        config = kwargs.pop("config", None)
        if not isinstance(config, PretrainedConfig):
1740
1741
1742
            config, kwargs = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
            )
Julien Chaumond's avatar
Julien Chaumond committed
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755

        for config_class, model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
            if isinstance(config, config_class):
                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)

        raise ValueError(
            "Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
            "Model type should be one of {}.".format(
                config.__class__,
                cls.__name__,
                ", ".join(c.__name__ for c in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()),
            )
        )