configuration_auto.py 38.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Arthur's avatar
Arthur committed
15
16
"""Auto Config class."""

17
import importlib
18
import os
19
import re
20
import warnings
21
from collections import OrderedDict
22
from typing import List, Union
23

Sylvain Gugger's avatar
Sylvain Gugger committed
24
from ...configuration_utils import PretrainedConfig
25
from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code
26
from ...utils import CONFIG_NAME, logging
Aymeric Augustin's avatar
Aymeric Augustin committed
27

28

29
30
logger = logging.get_logger(__name__)

31

32
33
34
CONFIG_MAPPING_NAMES = OrderedDict(
    [
        # Add configs here
35
        ("albert", "AlbertConfig"),
36
        ("align", "AlignConfig"),
Jongjyh's avatar
Jongjyh committed
37
        ("altclip", "AltCLIPConfig"),
38
        ("audio-spectrogram-transformer", "ASTConfig"),
39
        ("autoformer", "AutoformerConfig"),
Yoach Lacombe's avatar
Yoach Lacombe committed
40
        ("bark", "BarkConfig"),
41
        ("bart", "BartConfig"),
42
        ("beit", "BeitConfig"),
43
44
45
46
        ("bert", "BertConfig"),
        ("bert-generation", "BertGenerationConfig"),
        ("big_bird", "BigBirdConfig"),
        ("bigbird_pegasus", "BigBirdPegasusConfig"),
Kamal Raj Kanakarajan's avatar
Kamal Raj Kanakarajan committed
47
        ("biogpt", "BioGptConfig"),
NielsRogge's avatar
NielsRogge committed
48
        ("bit", "BitConfig"),
49
50
        ("blenderbot", "BlenderbotConfig"),
        ("blenderbot-small", "BlenderbotSmallConfig"),
Younes Belkada's avatar
Younes Belkada committed
51
        ("blip", "BlipConfig"),
NielsRogge's avatar
NielsRogge committed
52
        ("blip-2", "Blip2Config"),
Younes Belkada's avatar
Younes Belkada committed
53
        ("bloom", "BloomConfig"),
54
        ("bridgetower", "BridgeTowerConfig"),
Jinho Park's avatar
Jinho Park committed
55
        ("bros", "BrosConfig"),
56
        ("camembert", "CamembertConfig"),
57
        ("canine", "CanineConfig"),
58
        ("chinese_clip", "ChineseCLIPConfig"),
59
        ("chinese_clip_vision_model", "ChineseCLIPVisionConfig"),
60
        ("clap", "ClapConfig"),
61
        ("clip", "CLIPConfig"),
62
        ("clip_vision_model", "CLIPVisionConfig"),
NielsRogge's avatar
NielsRogge committed
63
        ("clipseg", "CLIPSegConfig"),
Susnato Dhar's avatar
Susnato Dhar committed
64
        ("clvp", "ClvpConfig"),
65
        ("code_llama", "LlamaConfig"),
rooa's avatar
rooa committed
66
        ("codegen", "CodeGenConfig"),
Saurabh Dash's avatar
Saurabh Dash committed
67
        ("cohere", "CohereConfig"),
68
        ("conditional_detr", "ConditionalDetrConfig"),
69
70
        ("convbert", "ConvBertConfig"),
        ("convnext", "ConvNextConfig"),
Alara Dirik's avatar
Alara Dirik committed
71
        ("convnextv2", "ConvNextV2Config"),
72
        ("cpmant", "CpmAntConfig"),
73
        ("ctrl", "CTRLConfig"),
NielsRogge's avatar
NielsRogge committed
74
        ("cvt", "CvtConfig"),
75
76
77
        ("data2vec-audio", "Data2VecAudioConfig"),
        ("data2vec-text", "Data2VecTextConfig"),
        ("data2vec-vision", "Data2VecVisionConfig"),
Abhi Venigalla's avatar
Abhi Venigalla committed
78
        ("dbrx", "DbrxConfig"),
79
80
81
        ("deberta", "DebertaConfig"),
        ("deberta-v2", "DebertaV2Config"),
        ("decision_transformer", "DecisionTransformerConfig"),
NielsRogge's avatar
NielsRogge committed
82
        ("deformable_detr", "DeformableDetrConfig"),
83
        ("deit", "DeiTConfig"),
NielsRogge's avatar
NielsRogge committed
84
        ("depth_anything", "DepthAnythingConfig"),
NielsRogge's avatar
NielsRogge committed
85
        ("deta", "DetaConfig"),
86
        ("detr", "DetrConfig"),
87
        ("dinat", "DinatConfig"),
NielsRogge's avatar
NielsRogge committed
88
        ("dinov2", "Dinov2Config"),
89
        ("distilbert", "DistilBertConfig"),
NielsRogge's avatar
NielsRogge committed
90
        ("donut-swin", "DonutSwinConfig"),
91
92
        ("dpr", "DPRConfig"),
        ("dpt", "DPTConfig"),
93
        ("efficientformer", "EfficientFormerConfig"),
Alara Dirik's avatar
Alara Dirik committed
94
        ("efficientnet", "EfficientNetConfig"),
95
        ("electra", "ElectraConfig"),
96
        ("encodec", "EncodecConfig"),
97
        ("encoder-decoder", "EncoderDecoderConfig"),
98
        ("ernie", "ErnieConfig"),
99
        ("ernie_m", "ErnieMConfig"),
100
        ("esm", "EsmConfig"),
Matt's avatar
Matt committed
101
        ("falcon", "FalconConfig"),
102
        ("fastspeech2_conformer", "FastSpeech2ConformerConfig"),
103
104
105
        ("flaubert", "FlaubertConfig"),
        ("flava", "FlavaConfig"),
        ("fnet", "FNetConfig"),
NielsRogge's avatar
NielsRogge committed
106
        ("focalnet", "FocalNetConfig"),
107
108
        ("fsmt", "FSMTConfig"),
        ("funnel", "FunnelConfig"),
Pablo Montalvo's avatar
Pablo Montalvo committed
109
        ("fuyu", "FuyuConfig"),
110
        ("gemma", "GemmaConfig"),
Arthur's avatar
Arthur committed
111
        ("gemma2", "Gemma2Config"),
112
        ("git", "GitConfig"),
113
        ("glpn", "GLPNConfig"),
114
        ("gpt-sw3", "GPT2Config"),
115
        ("gpt2", "GPT2Config"),
116
        ("gpt_bigcode", "GPTBigCodeConfig"),
117
        ("gpt_neo", "GPTNeoConfig"),
118
        ("gpt_neox", "GPTNeoXConfig"),
119
        ("gpt_neox_japanese", "GPTNeoXJapaneseConfig"),
120
        ("gptj", "GPTJConfig"),
121
        ("gptsan-japanese", "GPTSanJapaneseConfig"),
122
        ("graphormer", "GraphormerConfig"),
123
        ("grounding-dino", "GroundingDinoConfig"),
124
        ("groupvit", "GroupViTConfig"),
125
        ("hubert", "HubertConfig"),
126
        ("ibert", "IBertConfig"),
127
        ("idefics", "IdeficsConfig"),
amyeroberts's avatar
amyeroberts committed
128
        ("idefics2", "Idefics2Config"),
129
        ("imagegpt", "ImageGPTConfig"),
130
        ("informer", "InformerConfig"),
NielsRogge's avatar
NielsRogge committed
131
        ("instructblip", "InstructBlipConfig"),
132
        ("instructblipvideo", "InstructBlipVideoConfig"),
tomeras91's avatar
tomeras91 committed
133
        ("jamba", "JambaConfig"),
Yikang Shen's avatar
Yikang Shen committed
134
        ("jetmoe", "JetMoeConfig"),
135
        ("jukebox", "JukeboxConfig"),
Yih-Dar's avatar
Yih-Dar committed
136
        ("kosmos-2", "Kosmos2Config"),
137
138
        ("layoutlm", "LayoutLMConfig"),
        ("layoutlmv2", "LayoutLMv2Config"),
NielsRogge's avatar
NielsRogge committed
139
        ("layoutlmv3", "LayoutLMv3Config"),
140
        ("led", "LEDConfig"),
141
        ("levit", "LevitConfig"),
NielsRogge's avatar
NielsRogge committed
142
        ("lilt", "LiltConfig"),
Jason Phang's avatar
Jason Phang committed
143
        ("llama", "LlamaConfig"),
144
        ("llava", "LlavaConfig"),
145
        ("llava-next-video", "LlavaNextVideoConfig"),
NielsRogge's avatar
NielsRogge committed
146
        ("llava_next", "LlavaNextConfig"),
147
        ("longformer", "LongformerConfig"),
Daniel Stancl's avatar
Daniel Stancl committed
148
        ("longt5", "LongT5Config"),
149
150
151
        ("luke", "LukeConfig"),
        ("lxmert", "LxmertConfig"),
        ("m2m_100", "M2M100Config"),
152
        ("mamba", "MambaConfig"),
153
        ("marian", "MarianConfig"),
NielsRogge's avatar
NielsRogge committed
154
        ("markuplm", "MarkupLMConfig"),
Alara Dirik's avatar
Alara Dirik committed
155
        ("mask2former", "Mask2FormerConfig"),
156
        ("maskformer", "MaskFormerConfig"),
157
        ("maskformer-swin", "MaskFormerSwinConfig"),
158
        ("mbart", "MBartConfig"),
Chan Woo Kim's avatar
Chan Woo Kim committed
159
        ("mctct", "MCTCTConfig"),
160
        ("mega", "MegaConfig"),
161
        ("megatron-bert", "MegatronBertConfig"),
wangpeng's avatar
wangpeng committed
162
        ("mgp-str", "MgpstrConfig"),
163
        ("mistral", "MistralConfig"),
164
        ("mixtral", "MixtralConfig"),
165
        ("mobilebert", "MobileBertConfig"),
166
        ("mobilenet_v1", "MobileNetV1Config"),
167
        ("mobilenet_v2", "MobileNetV2Config"),
168
        ("mobilevit", "MobileViTConfig"),
Shehan Munasinghe's avatar
Shehan Munasinghe committed
169
        ("mobilevitv2", "MobileViTV2Config"),
170
        ("mpnet", "MPNetConfig"),
171
        ("mpt", "MptConfig"),
172
        ("mra", "MraConfig"),
173
        ("mt5", "MT5Config"),
Sanchit Gandhi's avatar
Sanchit Gandhi committed
174
        ("musicgen", "MusicgenConfig"),
Yoach Lacombe's avatar
Yoach Lacombe committed
175
        ("musicgen_melody", "MusicgenMelodyConfig"),
StevenTang1998's avatar
StevenTang1998 committed
176
        ("mvp", "MvpConfig"),
177
        ("nat", "NatConfig"),
178
        ("nezha", "NezhaConfig"),
179
        ("nllb-moe", "NllbMoeConfig"),
NielsRogge's avatar
NielsRogge committed
180
        ("nougat", "VisionEncoderDecoderConfig"),
181
        ("nystromformer", "NystromformerConfig"),
Shane A's avatar
Shane A committed
182
        ("olmo", "OlmoConfig"),
Jitesh Jain's avatar
Jitesh Jain committed
183
        ("oneformer", "OneFormerConfig"),
184
        ("open-llama", "OpenLlamaConfig"),
185
        ("openai-gpt", "OpenAIGPTConfig"),
Younes Belkada's avatar
Younes Belkada committed
186
        ("opt", "OPTConfig"),
NielsRogge's avatar
NielsRogge committed
187
        ("owlv2", "Owlv2Config"),
188
        ("owlvit", "OwlViTConfig"),
Pablo Montalvo's avatar
Pablo Montalvo committed
189
        ("paligemma", "PaliGemmaConfig"),
190
        ("patchtsmixer", "PatchTSMixerConfig"),
191
        ("patchtst", "PatchTSTConfig"),
192
        ("pegasus", "PegasusConfig"),
Jason Phang's avatar
Jason Phang committed
193
        ("pegasus_x", "PegasusXConfig"),
194
        ("perceiver", "PerceiverConfig"),
195
        ("persimmon", "PersimmonConfig"),
Susnato Dhar's avatar
Susnato Dhar committed
196
        ("phi", "PhiConfig"),
Gustavo de Rosa's avatar
Gustavo de Rosa committed
197
        ("phi3", "Phi3Config"),
Younes Belkada's avatar
Younes Belkada committed
198
        ("pix2struct", "Pix2StructConfig"),
199
200
        ("plbart", "PLBartConfig"),
        ("poolformer", "PoolFormerConfig"),
Susnato Dhar's avatar
Susnato Dhar committed
201
        ("pop2piano", "Pop2PianoConfig"),
202
        ("prophetnet", "ProphetNetConfig"),
Rinat's avatar
Rinat committed
203
        ("pvt", "PvtConfig"),
Nate Cibik's avatar
Nate Cibik committed
204
        ("pvt_v2", "PvtV2Config"),
205
        ("qdqbert", "QDQBertConfig"),
Junyang Lin's avatar
Junyang Lin committed
206
        ("qwen2", "Qwen2Config"),
Bo Zheng's avatar
Bo Zheng committed
207
        ("qwen2_moe", "Qwen2MoeConfig"),
208
209
        ("rag", "RagConfig"),
        ("realm", "RealmConfig"),
Arthur's avatar
Arthur committed
210
        ("recurrent_gemma", "RecurrentGemmaConfig"),
211
        ("reformer", "ReformerConfig"),
212
213
214
215
        ("regnet", "RegNetConfig"),
        ("rembert", "RemBertConfig"),
        ("resnet", "ResNetConfig"),
        ("retribert", "RetriBertConfig"),
216
        ("roberta", "RobertaConfig"),
217
        ("roberta-prelayernorm", "RobertaPreLayerNormConfig"),
Weiwe Shi's avatar
Weiwe Shi committed
218
        ("roc_bert", "RoCBertConfig"),
219
        ("roformer", "RoFormerConfig"),
220
221
        ("rt_detr", "RTDetrConfig"),
        ("rt_detr_resnet", "RTDetrResNetConfig"),
Sylvain Gugger's avatar
Sylvain Gugger committed
222
        ("rwkv", "RwkvConfig"),
223
        ("sam", "SamConfig"),
224
        ("seamless_m4t", "SeamlessM4TConfig"),
Yoach Lacombe's avatar
Yoach Lacombe committed
225
        ("seamless_m4t_v2", "SeamlessM4Tv2Config"),
226
        ("segformer", "SegformerConfig"),
Eduardo Pacheco's avatar
Eduardo Pacheco committed
227
        ("seggpt", "SegGptConfig"),
228
229
        ("sew", "SEWConfig"),
        ("sew-d", "SEWDConfig"),
NielsRogge's avatar
NielsRogge committed
230
231
        ("siglip", "SiglipConfig"),
        ("siglip_vision_model", "SiglipVisionConfig"),
232
        ("speech-encoder-decoder", "SpeechEncoderDecoderConfig"),
233
234
        ("speech_to_text", "Speech2TextConfig"),
        ("speech_to_text_2", "Speech2Text2Config"),
235
        ("speecht5", "SpeechT5Config"),
Ori Ram's avatar
Ori Ram committed
236
        ("splinter", "SplinterConfig"),
237
        ("squeezebert", "SqueezeBertConfig"),
Jonathan Tow's avatar
Jonathan Tow committed
238
        ("stablelm", "StableLmConfig"),
RaymondLi0's avatar
RaymondLi0 committed
239
        ("starcoder2", "Starcoder2Config"),
240
        ("superpoint", "SuperPointConfig"),
Shehan Munasinghe's avatar
Shehan Munasinghe committed
241
        ("swiftformer", "SwiftFormerConfig"),
242
        ("swin", "SwinConfig"),
NielsRogge's avatar
NielsRogge committed
243
        ("swin2sr", "Swin2SRConfig"),
244
        ("swinv2", "Swinv2Config"),
245
        ("switch_transformers", "SwitchTransformersConfig"),
246
        ("t5", "T5Config"),
247
        ("table-transformer", "TableTransformerConfig"),
248
        ("tapas", "TapasConfig"),
249
        ("time_series_transformer", "TimeSeriesTransformerConfig"),
250
        ("timesformer", "TimesformerConfig"),
amyeroberts's avatar
amyeroberts committed
251
        ("timm_backbone", "TimmBackboneConfig"),
Carl's avatar
Carl committed
252
        ("trajectory_transformer", "TrajectoryTransformerConfig"),
253
254
        ("transfo-xl", "TransfoXLConfig"),
        ("trocr", "TrOCRConfig"),
Zineng Tang's avatar
Zineng Tang committed
255
        ("tvlt", "TvltConfig"),
jiqing-feng's avatar
jiqing-feng committed
256
        ("tvp", "TvpConfig"),
NielsRogge's avatar
NielsRogge committed
257
        ("udop", "UdopConfig"),
258
        ("umt5", "UMT5Config"),
259
        ("unispeech", "UniSpeechConfig"),
260
        ("unispeech-sat", "UniSpeechSatConfig"),
261
        ("univnet", "UnivNetConfig"),
NielsRogge's avatar
NielsRogge committed
262
        ("upernet", "UperNetConfig"),
263
        ("van", "VanConfig"),
Raushan Turganbay's avatar
Raushan Turganbay committed
264
        ("video_llava", "VideoLlavaConfig"),
NielsRogge's avatar
NielsRogge committed
265
        ("videomae", "VideoMAEConfig"),
266
        ("vilt", "ViltConfig"),
267
        ("vipllava", "VipLlavaConfig"),
268
269
270
271
        ("vision-encoder-decoder", "VisionEncoderDecoderConfig"),
        ("vision-text-dual-encoder", "VisionTextDualEncoderConfig"),
        ("visual_bert", "VisualBertConfig"),
        ("vit", "ViTConfig"),
NielsRogge's avatar
NielsRogge committed
272
        ("vit_hybrid", "ViTHybridConfig"),
273
        ("vit_mae", "ViTMAEConfig"),
274
        ("vit_msn", "ViTMSNConfig"),
NielsRogge's avatar
NielsRogge committed
275
        ("vitdet", "VitDetConfig"),
NielsRogge's avatar
NielsRogge committed
276
        ("vitmatte", "VitMatteConfig"),
Matthijs Hollemans's avatar
Matthijs Hollemans committed
277
        ("vits", "VitsConfig"),
Jegor Kit拧kerkin's avatar
Jegor Kit拧kerkin committed
278
        ("vivit", "VivitConfig"),
279
        ("wav2vec2", "Wav2Vec2Config"),
280
        ("wav2vec2-bert", "Wav2Vec2BertConfig"),
281
        ("wav2vec2-conformer", "Wav2Vec2ConformerConfig"),
Patrick von Platen's avatar
Patrick von Platen committed
282
        ("wavlm", "WavLMConfig"),
283
        ("whisper", "WhisperConfig"),
NielsRogge's avatar
NielsRogge committed
284
        ("xclip", "XCLIPConfig"),
285
286
287
288
289
290
        ("xglm", "XGLMConfig"),
        ("xlm", "XLMConfig"),
        ("xlm-prophetnet", "XLMProphetNetConfig"),
        ("xlm-roberta", "XLMRobertaConfig"),
        ("xlm-roberta-xl", "XLMRobertaXLConfig"),
        ("xlnet", "XLNetConfig"),
Jannis Vamvas's avatar
Jannis Vamvas committed
291
        ("xmod", "XmodConfig"),
292
293
        ("yolos", "YolosConfig"),
        ("yoso", "YosoConfig"),
294
295
    ]
)
296

amyeroberts's avatar
amyeroberts committed
297

298
299
MODEL_NAMES_MAPPING = OrderedDict(
    [
300
        # Add full (and cased) model names here
301
        ("albert", "ALBERT"),
302
        ("align", "ALIGN"),
Jongjyh's avatar
Jongjyh committed
303
        ("altclip", "AltCLIP"),
304
        ("audio-spectrogram-transformer", "Audio Spectrogram Transformer"),
305
        ("autoformer", "Autoformer"),
Yoach Lacombe's avatar
Yoach Lacombe committed
306
        ("bark", "Bark"),
307
308
309
        ("bart", "BART"),
        ("barthez", "BARThez"),
        ("bartpho", "BARTpho"),
NielsRogge's avatar
NielsRogge committed
310
        ("beit", "BEiT"),
311
312
313
        ("bert", "BERT"),
        ("bert-generation", "Bert Generation"),
        ("bert-japanese", "BertJapanese"),
314
        ("bertweet", "BERTweet"),
315
        ("big_bird", "BigBird"),
316
        ("bigbird_pegasus", "BigBird-Pegasus"),
Kamal Raj Kanakarajan's avatar
Kamal Raj Kanakarajan committed
317
        ("biogpt", "BioGpt"),
NielsRogge's avatar
NielsRogge committed
318
        ("bit", "BiT"),
319
320
        ("blenderbot", "Blenderbot"),
        ("blenderbot-small", "BlenderbotSmall"),
Younes Belkada's avatar
Younes Belkada committed
321
        ("blip", "BLIP"),
NielsRogge's avatar
NielsRogge committed
322
        ("blip-2", "BLIP-2"),
Younes Belkada's avatar
Younes Belkada committed
323
        ("bloom", "BLOOM"),
324
        ("bort", "BORT"),
325
        ("bridgetower", "BridgeTower"),
Jinho Park's avatar
Jinho Park committed
326
        ("bros", "BROS"),
327
328
        ("byt5", "ByT5"),
        ("camembert", "CamemBERT"),
329
        ("canine", "CANINE"),
330
        ("chinese_clip", "Chinese-CLIP"),
331
        ("chinese_clip_vision_model", "ChineseCLIPVisionModel"),
332
        ("clap", "CLAP"),
Suraj Patil's avatar
Suraj Patil committed
333
        ("clip", "CLIP"),
334
        ("clip_vision_model", "CLIPVisionModel"),
NielsRogge's avatar
NielsRogge committed
335
        ("clipseg", "CLIPSeg"),
Susnato Dhar's avatar
Susnato Dhar committed
336
        ("clvp", "CLVP"),
337
        ("code_llama", "CodeLlama"),
rooa's avatar
rooa committed
338
        ("codegen", "CodeGen"),
Saurabh Dash's avatar
Saurabh Dash committed
339
        ("cohere", "Cohere"),
340
        ("conditional_detr", "Conditional DETR"),
341
        ("convbert", "ConvBERT"),
342
        ("convnext", "ConvNeXT"),
Alara Dirik's avatar
Alara Dirik committed
343
        ("convnextv2", "ConvNeXTV2"),
344
        ("cpm", "CPM"),
345
        ("cpmant", "CPM-Ant"),
346
        ("ctrl", "CTRL"),
NielsRogge's avatar
NielsRogge committed
347
        ("cvt", "CvT"),
348
349
350
        ("data2vec-audio", "Data2VecAudio"),
        ("data2vec-text", "Data2VecText"),
        ("data2vec-vision", "Data2VecVision"),
Abhi Venigalla's avatar
Abhi Venigalla committed
351
        ("dbrx", "DBRX"),
352
353
354
        ("deberta", "DeBERTa"),
        ("deberta-v2", "DeBERTa-v2"),
        ("decision_transformer", "Decision Transformer"),
NielsRogge's avatar
NielsRogge committed
355
        ("deformable_detr", "Deformable DETR"),
NielsRogge's avatar
NielsRogge committed
356
        ("deit", "DeiT"),
357
        ("deplot", "DePlot"),
NielsRogge's avatar
NielsRogge committed
358
        ("depth_anything", "Depth Anything"),
NielsRogge's avatar
NielsRogge committed
359
        ("deta", "DETA"),
NielsRogge's avatar
NielsRogge committed
360
        ("detr", "DETR"),
361
        ("dialogpt", "DialoGPT"),
362
        ("dinat", "DiNAT"),
NielsRogge's avatar
NielsRogge committed
363
        ("dinov2", "DINOv2"),
364
365
        ("distilbert", "DistilBERT"),
        ("dit", "DiT"),
NielsRogge's avatar
NielsRogge committed
366
        ("donut-swin", "DonutSwin"),
367
368
        ("dpr", "DPR"),
        ("dpt", "DPT"),
369
        ("efficientformer", "EfficientFormer"),
Alara Dirik's avatar
Alara Dirik committed
370
        ("efficientnet", "EfficientNet"),
371
        ("electra", "ELECTRA"),
372
        ("encodec", "EnCodec"),
373
        ("encoder-decoder", "Encoder decoder"),
374
        ("ernie", "ERNIE"),
375
        ("ernie_m", "ErnieM"),
376
        ("esm", "ESM"),
Matt's avatar
Matt committed
377
        ("falcon", "Falcon"),
378
        ("fastspeech2_conformer", "FastSpeech2Conformer"),
379
        ("flan-t5", "FLAN-T5"),
Arthur's avatar
Arthur committed
380
        ("flan-ul2", "FLAN-UL2"),
381
        ("flaubert", "FlauBERT"),
382
        ("flava", "FLAVA"),
383
        ("fnet", "FNet"),
NielsRogge's avatar
NielsRogge committed
384
        ("focalnet", "FocalNet"),
385
386
        ("fsmt", "FairSeq Machine-Translation"),
        ("funnel", "Funnel Transformer"),
Pablo Montalvo's avatar
Pablo Montalvo committed
387
        ("fuyu", "Fuyu"),
388
        ("gemma", "Gemma"),
Arthur's avatar
Arthur committed
389
        ("gemma2", "Gemma2"),
390
        ("git", "GIT"),
391
        ("glpn", "GLPN"),
392
        ("gpt-sw3", "GPT-Sw3"),
393
        ("gpt2", "OpenAI GPT-2"),
394
        ("gpt_bigcode", "GPTBigCode"),
Suraj Patil's avatar
Suraj Patil committed
395
        ("gpt_neo", "GPT Neo"),
396
        ("gpt_neox", "GPT NeoX"),
397
        ("gpt_neox_japanese", "GPT NeoX Japanese"),
398
        ("gptj", "GPT-J"),
399
        ("gptsan-japanese", "GPTSAN-japanese"),
400
        ("graphormer", "Graphormer"),
401
        ("grounding-dino", "Grounding DINO"),
402
        ("groupvit", "GroupViT"),
403
404
        ("herbert", "HerBERT"),
        ("hubert", "Hubert"),
Sehoon Kim's avatar
Sehoon Kim committed
405
        ("ibert", "I-BERT"),
406
        ("idefics", "IDEFICS"),
amyeroberts's avatar
amyeroberts committed
407
        ("idefics2", "Idefics2"),
408
        ("imagegpt", "ImageGPT"),
409
        ("informer", "Informer"),
NielsRogge's avatar
NielsRogge committed
410
        ("instructblip", "InstructBLIP"),
411
        ("instructblipvideo", "InstructBlipVideo"),
tomeras91's avatar
tomeras91 committed
412
        ("jamba", "Jamba"),
Yikang Shen's avatar
Yikang Shen committed
413
        ("jetmoe", "JetMoe"),
414
        ("jukebox", "Jukebox"),
Yih-Dar's avatar
Yih-Dar committed
415
        ("kosmos-2", "KOSMOS-2"),
416
417
        ("layoutlm", "LayoutLM"),
        ("layoutlmv2", "LayoutLMv2"),
NielsRogge's avatar
NielsRogge committed
418
        ("layoutlmv3", "LayoutLMv3"),
419
420
        ("layoutxlm", "LayoutXLM"),
        ("led", "LED"),
421
        ("levit", "LeViT"),
NielsRogge's avatar
NielsRogge committed
422
        ("lilt", "LiLT"),
Jason Phang's avatar
Jason Phang committed
423
        ("llama", "LLaMA"),
424
        ("llama2", "Llama2"),
Arthur's avatar
Arthur committed
425
        ("llama3", "Llama3"),
426
        ("llava", "LLaVa"),
427
        ("llava-next-video", "LLaVa-NeXT-Video"),
NielsRogge's avatar
NielsRogge committed
428
        ("llava_next", "LLaVA-NeXT"),
429
        ("longformer", "Longformer"),
Daniel Stancl's avatar
Daniel Stancl committed
430
        ("longt5", "LongT5"),
431
432
433
        ("luke", "LUKE"),
        ("lxmert", "LXMERT"),
        ("m2m_100", "M2M100"),
434
        ("madlad-400", "MADLAD-400"),
435
        ("mamba", "Mamba"),
436
        ("marian", "Marian"),
NielsRogge's avatar
NielsRogge committed
437
        ("markuplm", "MarkupLM"),
Alara Dirik's avatar
Alara Dirik committed
438
        ("mask2former", "Mask2Former"),
439
        ("maskformer", "MaskFormer"),
440
        ("maskformer-swin", "MaskFormerSwin"),
441
        ("matcha", "MatCha"),
442
        ("mbart", "mBART"),
443
        ("mbart50", "mBART-50"),
Chan Woo Kim's avatar
Chan Woo Kim committed
444
        ("mctct", "M-CTC-T"),
445
        ("mega", "MEGA"),
446
447
        ("megatron-bert", "Megatron-BERT"),
        ("megatron_gpt2", "Megatron-GPT2"),
wangpeng's avatar
wangpeng committed
448
        ("mgp-str", "MGP-STR"),
449
        ("mistral", "Mistral"),
450
        ("mixtral", "Mixtral"),
451
        ("mluke", "mLUKE"),
452
        ("mms", "MMS"),
453
        ("mobilebert", "MobileBERT"),
454
        ("mobilenet_v1", "MobileNetV1"),
455
        ("mobilenet_v2", "MobileNetV2"),
456
        ("mobilevit", "MobileViT"),
Shehan Munasinghe's avatar
Shehan Munasinghe committed
457
        ("mobilevitv2", "MobileViTV2"),
458
        ("mpnet", "MPNet"),
459
        ("mpt", "MPT"),
460
        ("mra", "MRA"),
461
        ("mt5", "MT5"),
Sanchit Gandhi's avatar
Sanchit Gandhi committed
462
        ("musicgen", "MusicGen"),
Yoach Lacombe's avatar
Yoach Lacombe committed
463
        ("musicgen_melody", "MusicGen Melody"),
StevenTang1998's avatar
StevenTang1998 committed
464
        ("mvp", "MVP"),
465
        ("nat", "NAT"),
466
        ("nezha", "Nezha"),
Lysandre Debut's avatar
Lysandre Debut committed
467
        ("nllb", "NLLB"),
468
        ("nllb-moe", "NLLB-MOE"),
NielsRogge's avatar
NielsRogge committed
469
        ("nougat", "Nougat"),
470
        ("nystromformer", "Nystr枚mformer"),
Shane A's avatar
Shane A committed
471
        ("olmo", "OLMo"),
Jitesh Jain's avatar
Jitesh Jain committed
472
        ("oneformer", "OneFormer"),
473
        ("open-llama", "OpenLlama"),
474
        ("openai-gpt", "OpenAI GPT"),
Younes Belkada's avatar
Younes Belkada committed
475
        ("opt", "OPT"),
NielsRogge's avatar
NielsRogge committed
476
        ("owlv2", "OWLv2"),
477
        ("owlvit", "OWL-ViT"),
Pablo Montalvo's avatar
Pablo Montalvo committed
478
        ("paligemma", "PaliGemma"),
479
        ("patchtsmixer", "PatchTSMixer"),
480
        ("patchtst", "PatchTST"),
481
        ("pegasus", "Pegasus"),
Jason Phang's avatar
Jason Phang committed
482
        ("pegasus_x", "PEGASUS-X"),
483
        ("perceiver", "Perceiver"),
484
        ("persimmon", "Persimmon"),
Susnato Dhar's avatar
Susnato Dhar committed
485
        ("phi", "Phi"),
Gustavo de Rosa's avatar
Gustavo de Rosa committed
486
        ("phi3", "Phi3"),
487
        ("phobert", "PhoBERT"),
Younes Belkada's avatar
Younes Belkada committed
488
        ("pix2struct", "Pix2Struct"),
489
490
        ("plbart", "PLBart"),
        ("poolformer", "PoolFormer"),
Susnato Dhar's avatar
Susnato Dhar committed
491
        ("pop2piano", "Pop2Piano"),
492
        ("prophetnet", "ProphetNet"),
Rinat's avatar
Rinat committed
493
        ("pvt", "PVT"),
Nate Cibik's avatar
Nate Cibik committed
494
        ("pvt_v2", "PVTv2"),
495
        ("qdqbert", "QDQBert"),
Junyang Lin's avatar
Junyang Lin committed
496
        ("qwen2", "Qwen2"),
Bo Zheng's avatar
Bo Zheng committed
497
        ("qwen2_moe", "Qwen2MoE"),
498
        ("rag", "RAG"),
499
        ("realm", "REALM"),
Arthur's avatar
Arthur committed
500
        ("recurrent_gemma", "RecurrentGemma"),
501
        ("reformer", "Reformer"),
502
503
504
505
        ("regnet", "RegNet"),
        ("rembert", "RemBERT"),
        ("resnet", "ResNet"),
        ("retribert", "RetriBERT"),
506
        ("roberta", "RoBERTa"),
507
        ("roberta-prelayernorm", "RoBERTa-PreLayerNorm"),
Weiwe Shi's avatar
Weiwe Shi committed
508
        ("roc_bert", "RoCBert"),
509
        ("roformer", "RoFormer"),
510
511
        ("rt_detr", "RT-DETR"),
        ("rt_detr_resnet", "RT-DETR-ResNet"),
Sylvain Gugger's avatar
Sylvain Gugger committed
512
        ("rwkv", "RWKV"),
513
        ("sam", "SAM"),
514
        ("seamless_m4t", "SeamlessM4T"),
Yoach Lacombe's avatar
Yoach Lacombe committed
515
        ("seamless_m4t_v2", "SeamlessM4Tv2"),
516
        ("segformer", "SegFormer"),
Eduardo Pacheco's avatar
Eduardo Pacheco committed
517
        ("seggpt", "SegGPT"),
518
519
        ("sew", "SEW"),
        ("sew-d", "SEW-D"),
NielsRogge's avatar
NielsRogge committed
520
521
        ("siglip", "SigLIP"),
        ("siglip_vision_model", "SiglipVisionModel"),
522
        ("speech-encoder-decoder", "Speech Encoder decoder"),
523
524
        ("speech_to_text", "Speech2Text"),
        ("speech_to_text_2", "Speech2Text2"),
525
        ("speecht5", "SpeechT5"),
Ori Ram's avatar
Ori Ram committed
526
        ("splinter", "Splinter"),
527
        ("squeezebert", "SqueezeBERT"),
Jonathan Tow's avatar
Jonathan Tow committed
528
        ("stablelm", "StableLm"),
RaymondLi0's avatar
RaymondLi0 committed
529
        ("starcoder2", "Starcoder2"),
530
        ("superpoint", "SuperPoint"),
Shehan Munasinghe's avatar
Shehan Munasinghe committed
531
        ("swiftformer", "SwiftFormer"),
532
        ("swin", "Swin Transformer"),
NielsRogge's avatar
NielsRogge committed
533
        ("swin2sr", "Swin2SR"),
534
        ("swinv2", "Swin Transformer V2"),
535
        ("switch_transformers", "SwitchTransformers"),
536
537
        ("t5", "T5"),
        ("t5v1.1", "T5v1.1"),
538
        ("table-transformer", "Table Transformer"),
539
540
        ("tapas", "TAPAS"),
        ("tapex", "TAPEX"),
541
        ("time_series_transformer", "Time Series Transformer"),
542
        ("timesformer", "TimeSformer"),
amyeroberts's avatar
amyeroberts committed
543
        ("timm_backbone", "TimmBackbone"),
Carl's avatar
Carl committed
544
        ("trajectory_transformer", "Trajectory Transformer"),
545
546
        ("transfo-xl", "Transformer-XL"),
        ("trocr", "TrOCR"),
Zineng Tang's avatar
Zineng Tang committed
547
        ("tvlt", "TVLT"),
jiqing-feng's avatar
jiqing-feng committed
548
        ("tvp", "TVP"),
NielsRogge's avatar
NielsRogge committed
549
        ("udop", "UDOP"),
550
        ("ul2", "UL2"),
551
        ("umt5", "UMT5"),
552
        ("unispeech", "UniSpeech"),
553
        ("unispeech-sat", "UniSpeechSat"),
554
        ("univnet", "UnivNet"),
NielsRogge's avatar
NielsRogge committed
555
        ("upernet", "UPerNet"),
556
        ("van", "VAN"),
Raushan Turganbay's avatar
Raushan Turganbay committed
557
        ("video_llava", "VideoLlava"),
NielsRogge's avatar
NielsRogge committed
558
        ("videomae", "VideoMAE"),
559
        ("vilt", "ViLT"),
560
        ("vipllava", "VipLlava"),
561
562
        ("vision-encoder-decoder", "Vision Encoder decoder"),
        ("vision-text-dual-encoder", "VisionTextDualEncoder"),
563
        ("visual_bert", "VisualBERT"),
564
        ("vit", "ViT"),
NielsRogge's avatar
NielsRogge committed
565
        ("vit_hybrid", "ViT Hybrid"),
566
        ("vit_mae", "ViTMAE"),
567
        ("vit_msn", "ViTMSN"),
NielsRogge's avatar
NielsRogge committed
568
        ("vitdet", "VitDet"),
NielsRogge's avatar
NielsRogge committed
569
        ("vitmatte", "ViTMatte"),
Matthijs Hollemans's avatar
Matthijs Hollemans committed
570
        ("vits", "VITS"),
Jegor Kit拧kerkin's avatar
Jegor Kit拧kerkin committed
571
        ("vivit", "ViViT"),
572
        ("wav2vec2", "Wav2Vec2"),
573
        ("wav2vec2-bert", "Wav2Vec2-BERT"),
574
        ("wav2vec2-conformer", "Wav2Vec2-Conformer"),
575
        ("wav2vec2_phoneme", "Wav2Vec2Phoneme"),
Patrick von Platen's avatar
Patrick von Platen committed
576
        ("wavlm", "WavLM"),
577
        ("whisper", "Whisper"),
NielsRogge's avatar
NielsRogge committed
578
        ("xclip", "X-CLIP"),
579
580
        ("xglm", "XGLM"),
        ("xlm", "XLM"),
581
        ("xlm-prophetnet", "XLM-ProphetNet"),
582
583
        ("xlm-roberta", "XLM-RoBERTa"),
        ("xlm-roberta-xl", "XLM-RoBERTa-XL"),
584
        ("xlm-v", "XLM-V"),
585
        ("xlnet", "XLNet"),
586
587
        ("xls_r", "XLS-R"),
        ("xlsr_wav2vec2", "XLSR-Wav2Vec2"),
Jannis Vamvas's avatar
Jannis Vamvas committed
588
        ("xmod", "X-MOD"),
589
590
        ("yolos", "YOLOS"),
        ("yoso", "YOSO"),
591
592
593
    ]
)

Yih-Dar's avatar
Yih-Dar committed
594
595
# This is tied to the processing `-` -> `_` in `model_type_to_module_name`. For example, instead of putting
# `transfo-xl` (as in `CONFIG_MAPPING_NAMES`), we should use `transfo_xl`.
Sylvain Gugger's avatar
Sylvain Gugger committed
596
597
DEPRECATED_MODELS = [
    "bort",
598
599
600
601
602
603
    "deta",
    "efficientformer",
    "ernie_m",
    "gptsan_japanese",
    "graphormer",
    "jukebox",
Sylvain Gugger's avatar
Sylvain Gugger committed
604
    "mctct",
605
    "mega",
Sylvain Gugger's avatar
Sylvain Gugger committed
606
    "mmbt",
607
608
    "nat",
    "nezha",
609
    "open_llama",
610
611
    "qdqbert",
    "realm",
Sylvain Gugger's avatar
Sylvain Gugger committed
612
    "retribert",
613
    "speech_to_text_2",
614
    "tapex",
Sylvain Gugger's avatar
Sylvain Gugger committed
615
    "trajectory_transformer",
Yih-Dar's avatar
Yih-Dar committed
616
    "transfo_xl",
617
    "tvlt",
Sylvain Gugger's avatar
Sylvain Gugger committed
618
    "van",
619
620
    "vit_hybrid",
    "xlm_prophetnet",
Sylvain Gugger's avatar
Sylvain Gugger committed
621
622
]

623
SPECIAL_MODEL_TYPE_TO_MODULE_NAME = OrderedDict(
624
625
626
627
628
    [
        ("openai-gpt", "openai"),
        ("data2vec-audio", "data2vec"),
        ("data2vec-text", "data2vec"),
        ("data2vec-vision", "data2vec"),
NielsRogge's avatar
NielsRogge committed
629
        ("donut-swin", "donut"),
Yih-Dar's avatar
Yih-Dar committed
630
        ("kosmos-2", "kosmos2"),
631
        ("maskformer-swin", "maskformer"),
NielsRogge's avatar
NielsRogge committed
632
        ("xclip", "x_clip"),
633
        ("clip_vision_model", "clip"),
NielsRogge's avatar
NielsRogge committed
634
        ("siglip_vision_model", "siglip"),
635
        ("chinese_clip_vision_model", "chinese_clip"),
636
        ("rt_detr_resnet", "rt_detr"),
637
    ]
638
)
639
640
641
642
643
644


def model_type_to_module_name(key):
    """Converts a config key to the corresponding module."""
    # Special treatment
    if key in SPECIAL_MODEL_TYPE_TO_MODULE_NAME:
645
646
647
648
649
        key = SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key]

        if key in DEPRECATED_MODELS:
            key = f"deprecated.{key}"
        return key
650

Sylvain Gugger's avatar
Sylvain Gugger committed
651
652
653
654
655
    key = key.replace("-", "_")
    if key in DEPRECATED_MODELS:
        key = f"deprecated.{key}"

    return key
656
657
658
659
660
661
662


def config_class_to_model_type(config):
    """Converts a config class name to the corresponding model type"""
    for key, cls in CONFIG_MAPPING_NAMES.items():
        if cls == config:
            return key
663
664
665
666
    # if key not found check in extra content
    for key, cls in CONFIG_MAPPING._extra_content.items():
        if cls.__name__ == config:
            return key
667
668
669
670
671
672
673
674
675
676
    return None


class _LazyConfigMapping(OrderedDict):
    """
    A dictionary that lazily load its values when they are requested.
    """

    def __init__(self, mapping):
        self._mapping = mapping
677
        self._extra_content = {}
678
679
680
        self._modules = {}

    def __getitem__(self, key):
681
682
        if key in self._extra_content:
            return self._extra_content[key]
683
684
685
686
687
688
        if key not in self._mapping:
            raise KeyError(key)
        value = self._mapping[key]
        module_name = model_type_to_module_name(key)
        if module_name not in self._modules:
            self._modules[module_name] = importlib.import_module(f".{module_name}", "transformers.models")
689
690
691
692
693
694
695
        if hasattr(self._modules[module_name], value):
            return getattr(self._modules[module_name], value)

        # Some of the mappings have entries model_type -> config of another model type. In that case we try to grab the
        # object at the top level.
        transformers_module = importlib.import_module("transformers")
        return getattr(transformers_module, value)
696
697

    def keys(self):
698
        return list(self._mapping.keys()) + list(self._extra_content.keys())
699
700

    def values(self):
701
        return [self[k] for k in self._mapping.keys()] + list(self._extra_content.values())
702

703
    def items(self):
704
        return [(k, self[k]) for k in self._mapping.keys()] + list(self._extra_content.items())
705
706

    def __iter__(self):
707
        return iter(list(self._mapping.keys()) + list(self._extra_content.keys()))
708
709

    def __contains__(self, item):
710
711
        return item in self._mapping or item in self._extra_content

712
    def register(self, key, value, exist_ok=False):
713
714
715
        """
        Register a new configuration in this mapping.
        """
716
        if key in self._mapping.keys() and not exist_ok:
717
718
            raise ValueError(f"'{key}' is already used by a Transformers config, pick another name.")
        self._extra_content[key] = value
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775


CONFIG_MAPPING = _LazyConfigMapping(CONFIG_MAPPING_NAMES)


class _LazyLoadAllMappings(OrderedDict):
    """
    A mapping that will load all pairs of key values at the first access (either by indexing, requestions keys, values,
    etc.)

    Args:
        mapping: The mapping to load.
    """

    def __init__(self, mapping):
        self._mapping = mapping
        self._initialized = False
        self._data = {}

    def _initialize(self):
        if self._initialized:
            return

        for model_type, map_name in self._mapping.items():
            module_name = model_type_to_module_name(model_type)
            module = importlib.import_module(f".{module_name}", "transformers.models")
            mapping = getattr(module, map_name)
            self._data.update(mapping)

        self._initialized = True

    def __getitem__(self, key):
        self._initialize()
        return self._data[key]

    def keys(self):
        self._initialize()
        return self._data.keys()

    def values(self):
        self._initialize()
        return self._data.values()

    def items(self):
        self._initialize()
        return self._data.keys()

    def __iter__(self):
        self._initialize()
        return iter(self._data)

    def __contains__(self, item):
        self._initialize()
        return item in self._data


def _get_class_name(model_class: Union[str, List[str]]):
776
    if isinstance(model_class, (list, tuple)):
Stas Bekman's avatar
Stas Bekman committed
777
778
        return " or ".join([f"[`{c}`]" for c in model_class if c is not None])
    return f"[`{model_class}`]"
779
780


781
782
783
784
785
def _list_model_options(indent, config_to_class=None, use_model_types=True):
    if config_to_class is None and not use_model_types:
        raise ValueError("Using `use_model_types=False` requires a `config_to_class` dictionary.")
    if use_model_types:
        if config_to_class is None:
Stas Bekman's avatar
Stas Bekman committed
786
            model_type_to_name = {model_type: f"[`{config}`]" for model_type, config in CONFIG_MAPPING_NAMES.items()}
787
788
        else:
            model_type_to_name = {
789
790
791
                model_type: _get_class_name(model_class)
                for model_type, model_class in config_to_class.items()
                if model_type in MODEL_NAMES_MAPPING
792
793
            }
        lines = [
794
            f"{indent}- **{model_type}** -- {model_type_to_name[model_type]} ({MODEL_NAMES_MAPPING[model_type]} model)"
795
            for model_type in sorted(model_type_to_name.keys())
796
797
        ]
    else:
798
799
800
801
802
        config_to_name = {
            CONFIG_MAPPING_NAMES[config]: _get_class_name(clas)
            for config, clas in config_to_class.items()
            if config in CONFIG_MAPPING_NAMES
        }
803
        config_to_model_name = {
804
            config: MODEL_NAMES_MAPPING[model_type] for model_type, config in CONFIG_MAPPING_NAMES.items()
805
806
        }
        lines = [
Sylvain Gugger's avatar
Sylvain Gugger committed
807
808
            f"{indent}- [`{config_name}`] configuration class:"
            f" {config_to_name[config_name]} ({config_to_model_name[config_name]} model)"
809
            for config_name in sorted(config_to_name.keys())
810
811
812
813
814
815
816
        ]
    return "\n".join(lines)


def replace_list_option_in_docstrings(config_to_class=None, use_model_types=True):
    def docstring_decorator(fn):
        docstrings = fn.__doc__
817
818
819
        if docstrings is None:
            # Example: -OO
            return fn
820
821
822
823
824
825
826
827
828
829
830
831
        lines = docstrings.split("\n")
        i = 0
        while i < len(lines) and re.search(r"^(\s*)List options\s*$", lines[i]) is None:
            i += 1
        if i < len(lines):
            indent = re.search(r"^(\s*)List options\s*$", lines[i]).groups()[0]
            if use_model_types:
                indent = f"{indent}    "
            lines[i] = _list_model_options(indent, config_to_class=config_to_class, use_model_types=use_model_types)
            docstrings = "\n".join(lines)
        else:
            raise ValueError(
Sylvain Gugger's avatar
Sylvain Gugger committed
832
833
                f"The function {fn} should have an empty 'List options' in its docstring as placeholder, current"
                f" docstring is:\n{docstrings}"
834
835
836
837
838
839
840
            )
        fn.__doc__ = docstrings
        return fn

    return docstring_decorator


Julien Chaumond's avatar
Julien Chaumond committed
841
class AutoConfig:
Lysandre Debut's avatar
Lysandre Debut committed
842
    r"""
843
    This is a generic configuration class that will be instantiated as one of the configuration classes of the library
844
    when created with the [`~AutoConfig.from_pretrained`] class method.
845

846
    This class cannot be instantiated directly using `__init__()` (throws an error).
847
    """
848

849
    def __init__(self):
850
851
852
853
        raise EnvironmentError(
            "AutoConfig is designed to be instantiated "
            "using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
        )
854

855
    @classmethod
856
857
858
859
    def for_model(cls, model_type: str, *args, **kwargs):
        if model_type in CONFIG_MAPPING:
            config_class = CONFIG_MAPPING[model_type]
            return config_class(*args, **kwargs)
860
        raise ValueError(
861
            f"Unrecognized model identifier: {model_type}. Should contain one of {', '.join(CONFIG_MAPPING.keys())}"
862
        )
863

864
    @classmethod
865
    @replace_list_option_in_docstrings()
866
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
867
868
        r"""
        Instantiate one of the configuration classes of the library from a pretrained model configuration.
869

Sylvain Gugger's avatar
Sylvain Gugger committed
870
871
        The configuration class to instantiate is selected based on the `model_type` property of the config object that
        is loaded, or when it's missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:
872

873
        List options
Lysandre Debut's avatar
Lysandre Debut committed
874
875

        Args:
876
            pretrained_model_name_or_path (`str` or `os.PathLike`):
877
878
                Can be either:

879
                    - A string, the *model id* of a pretrained model configuration hosted inside a model repo on
880
                      huggingface.co.
881
                    - A path to a *directory* containing a configuration file saved using the
Sylvain Gugger's avatar
Sylvain Gugger committed
882
883
                      [`~PretrainedConfig.save_pretrained`] method, or the [`~PreTrainedModel.save_pretrained`] method,
                      e.g., `./my_model_directory/`.
884
885
886
                    - A path or url to a saved configuration JSON *file*, e.g.,
                      `./my_model_directory/configuration.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
887
888
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
889
            force_download (`bool`, *optional*, defaults to `False`):
890
891
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
892
893
894
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
895
            proxies (`Dict[str, str]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
896
897
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
898
            revision (`str`, *optional*, defaults to `"main"`):
Julien Chaumond's avatar
Julien Chaumond committed
899
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
900
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
Julien Chaumond's avatar
Julien Chaumond committed
901
                identifier allowed by git.
902
903
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final configuration object.
904

Sylvain Gugger's avatar
Sylvain Gugger committed
905
906
907
                If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
                dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
                part of `kwargs` which has not been used to update `config` and is otherwise ignored.
908
            trust_remote_code (`bool`, *optional*, defaults to `False`):
909
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
Sylvain Gugger's avatar
Sylvain Gugger committed
910
911
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
912
            kwargs(additional keyword arguments, *optional*):
913
                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
Sylvain Gugger's avatar
Sylvain Gugger committed
914
                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
915
                by the `return_unused_kwargs` keyword parameter.
Lysandre Debut's avatar
Lysandre Debut committed
916

917
        Examples:
918

919
920
        ```python
        >>> from transformers import AutoConfig
921

922
        >>> # Download configuration from huggingface.co and cache.
923
        >>> config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
Lysandre Debut's avatar
Lysandre Debut committed
924

925
        >>> # Download configuration from huggingface.co (user-uploaded) and cache.
Sylvain Gugger's avatar
Sylvain Gugger committed
926
        >>> config = AutoConfig.from_pretrained("dbmdz/bert-base-german-cased")
Lysandre Debut's avatar
Lysandre Debut committed
927

928
        >>> # If configuration file is in a directory (e.g., was saved using *save_pretrained('./test/saved_model/')*).
Sylvain Gugger's avatar
Sylvain Gugger committed
929
        >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/")
930

931
        >>> # Load a specific configuration file.
Sylvain Gugger's avatar
Sylvain Gugger committed
932
        >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/my_configuration.json")
933

934
        >>> # Change some config attributes when loading a pretrained config.
935
        >>> config = AutoConfig.from_pretrained("google-bert/bert-base-uncased", output_attentions=True, foo=False)
936
937
        >>> config.output_attentions
        True
Sylvain Gugger's avatar
Sylvain Gugger committed
938
939

        >>> config, unused_kwargs = AutoConfig.from_pretrained(
940
        ...     "google-bert/bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
Sylvain Gugger's avatar
Sylvain Gugger committed
941
        ... )
942
943
        >>> config.output_attentions
        True
Sylvain Gugger's avatar
Sylvain Gugger committed
944

945
        >>> unused_kwargs
946
947
        {'foo': False}
        ```"""
948
949
950
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
951
952
                "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
                FutureWarning,
953
954
955
956
957
958
959
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

960
        kwargs["_from_auto"] = True
961
        kwargs["name_or_path"] = pretrained_model_name_or_path
962
        trust_remote_code = kwargs.pop("trust_remote_code", None)
963
964
        code_revision = kwargs.pop("code_revision", None)

965
        config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
966
967
968
969
970
971
972
        has_remote_code = "auto_map" in config_dict and "AutoConfig" in config_dict["auto_map"]
        has_local_code = "model_type" in config_dict and config_dict["model_type"] in CONFIG_MAPPING
        trust_remote_code = resolve_trust_remote_code(
            trust_remote_code, pretrained_model_name_or_path, has_local_code, has_remote_code
        )

        if has_remote_code and trust_remote_code:
973
            class_ref = config_dict["auto_map"]["AutoConfig"]
974
975
976
            config_class = get_class_from_dynamic_module(
                class_ref, pretrained_model_name_or_path, code_revision=code_revision, **kwargs
            )
977
978
            if os.path.isdir(pretrained_model_name_or_path):
                config_class.register_for_auto_class()
979
980
            return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
        elif "model_type" in config_dict:
981
982
983
984
985
986
987
988
            try:
                config_class = CONFIG_MAPPING[config_dict["model_type"]]
            except KeyError:
                raise ValueError(
                    f"The checkpoint you are trying to load has model type `{config_dict['model_type']}` "
                    "but Transformers does not recognize this architecture. This could be because of an "
                    "issue with the checkpoint, or because your version of Transformers is out of date."
                )
989
            return config_class.from_dict(config_dict, **unused_kwargs)
990
991
        else:
            # Fallback: use pattern matching on the string.
992
993
            # We go from longer names to shorter names to catch roberta before bert (for instance)
            for pattern in sorted(CONFIG_MAPPING.keys(), key=len, reverse=True):
994
                if pattern in str(pretrained_model_name_or_path):
995
                    return CONFIG_MAPPING[pattern].from_dict(config_dict, **unused_kwargs)
996

997
        raise ValueError(
998
            f"Unrecognized model in {pretrained_model_name_or_path}. "
999
            f"Should have a `model_type` key in its {CONFIG_NAME}, or contain one of the following strings "
1000
            f"in its name: {', '.join(CONFIG_MAPPING.keys())}"
1001
        )
1002
1003

    @staticmethod
zspo's avatar
zspo committed
1004
    def register(model_type, config, exist_ok=False):
1005
1006
1007
1008
        """
        Register a new configuration for this class.

        Args:
1009
1010
            model_type (`str`): The model type like "bert" or "gpt".
            config ([`PretrainedConfig`]): The config to register.
1011
1012
1013
1014
1015
1016
1017
        """
        if issubclass(config, PretrainedConfig) and config.model_type != model_type:
            raise ValueError(
                "The config you are passing has a `model_type` attribute that is not consistent with the model type "
                f"you passed (config has {config.model_type} and you passed {model_type}. Fix one of those so they "
                "match!"
            )
zspo's avatar
zspo committed
1018
        CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok)