configuration_auto.py 37.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Sylvain Gugger's avatar
Sylvain Gugger committed
15
""" Auto Config class."""
16
import importlib
17
import os
18
import re
19
import warnings
20
from collections import OrderedDict
21
from typing import List, Union
22

Sylvain Gugger's avatar
Sylvain Gugger committed
23
from ...configuration_utils import PretrainedConfig
24
from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code
25
from ...utils import CONFIG_NAME, logging
Aymeric Augustin's avatar
Aymeric Augustin committed
26

27

28
29
logger = logging.get_logger(__name__)

30
31
32
33

from ..deprecated._archive_maps import CONFIG_ARCHIVE_MAP_MAPPING_NAMES  # noqa: F401, E402


34
35
36
CONFIG_MAPPING_NAMES = OrderedDict(
    [
        # Add configs here
37
        ("albert", "AlbertConfig"),
38
        ("align", "AlignConfig"),
Jongjyh's avatar
Jongjyh committed
39
        ("altclip", "AltCLIPConfig"),
40
        ("audio-spectrogram-transformer", "ASTConfig"),
41
        ("autoformer", "AutoformerConfig"),
Yoach Lacombe's avatar
Yoach Lacombe committed
42
        ("bark", "BarkConfig"),
43
        ("bart", "BartConfig"),
44
        ("beit", "BeitConfig"),
45
46
47
48
        ("bert", "BertConfig"),
        ("bert-generation", "BertGenerationConfig"),
        ("big_bird", "BigBirdConfig"),
        ("bigbird_pegasus", "BigBirdPegasusConfig"),
Kamal Raj Kanakarajan's avatar
Kamal Raj Kanakarajan committed
49
        ("biogpt", "BioGptConfig"),
NielsRogge's avatar
NielsRogge committed
50
        ("bit", "BitConfig"),
51
52
        ("blenderbot", "BlenderbotConfig"),
        ("blenderbot-small", "BlenderbotSmallConfig"),
Younes Belkada's avatar
Younes Belkada committed
53
        ("blip", "BlipConfig"),
NielsRogge's avatar
NielsRogge committed
54
        ("blip-2", "Blip2Config"),
Younes Belkada's avatar
Younes Belkada committed
55
        ("bloom", "BloomConfig"),
56
        ("bridgetower", "BridgeTowerConfig"),
Jinho Park's avatar
Jinho Park committed
57
        ("bros", "BrosConfig"),
58
        ("camembert", "CamembertConfig"),
59
        ("canine", "CanineConfig"),
60
        ("chinese_clip", "ChineseCLIPConfig"),
61
        ("chinese_clip_vision_model", "ChineseCLIPVisionConfig"),
62
        ("clap", "ClapConfig"),
63
        ("clip", "CLIPConfig"),
64
        ("clip_vision_model", "CLIPVisionConfig"),
NielsRogge's avatar
NielsRogge committed
65
        ("clipseg", "CLIPSegConfig"),
Susnato Dhar's avatar
Susnato Dhar committed
66
        ("clvp", "ClvpConfig"),
67
        ("code_llama", "LlamaConfig"),
rooa's avatar
rooa committed
68
        ("codegen", "CodeGenConfig"),
Saurabh Dash's avatar
Saurabh Dash committed
69
        ("cohere", "CohereConfig"),
70
        ("conditional_detr", "ConditionalDetrConfig"),
71
72
        ("convbert", "ConvBertConfig"),
        ("convnext", "ConvNextConfig"),
Alara Dirik's avatar
Alara Dirik committed
73
        ("convnextv2", "ConvNextV2Config"),
74
        ("cpmant", "CpmAntConfig"),
75
        ("ctrl", "CTRLConfig"),
NielsRogge's avatar
NielsRogge committed
76
        ("cvt", "CvtConfig"),
77
78
79
80
81
82
        ("data2vec-audio", "Data2VecAudioConfig"),
        ("data2vec-text", "Data2VecTextConfig"),
        ("data2vec-vision", "Data2VecVisionConfig"),
        ("deberta", "DebertaConfig"),
        ("deberta-v2", "DebertaV2Config"),
        ("decision_transformer", "DecisionTransformerConfig"),
NielsRogge's avatar
NielsRogge committed
83
        ("deformable_detr", "DeformableDetrConfig"),
84
        ("deit", "DeiTConfig"),
NielsRogge's avatar
NielsRogge committed
85
        ("depth_anything", "DepthAnythingConfig"),
NielsRogge's avatar
NielsRogge committed
86
        ("deta", "DetaConfig"),
87
        ("detr", "DetrConfig"),
88
        ("dinat", "DinatConfig"),
NielsRogge's avatar
NielsRogge committed
89
        ("dinov2", "Dinov2Config"),
90
        ("distilbert", "DistilBertConfig"),
NielsRogge's avatar
NielsRogge committed
91
        ("donut-swin", "DonutSwinConfig"),
92
93
        ("dpr", "DPRConfig"),
        ("dpt", "DPTConfig"),
94
        ("efficientformer", "EfficientFormerConfig"),
Alara Dirik's avatar
Alara Dirik committed
95
        ("efficientnet", "EfficientNetConfig"),
96
        ("electra", "ElectraConfig"),
97
        ("encodec", "EncodecConfig"),
98
        ("encoder-decoder", "EncoderDecoderConfig"),
99
        ("ernie", "ErnieConfig"),
100
        ("ernie_m", "ErnieMConfig"),
101
        ("esm", "EsmConfig"),
Matt's avatar
Matt committed
102
        ("falcon", "FalconConfig"),
103
        ("fastspeech2_conformer", "FastSpeech2ConformerConfig"),
104
105
106
        ("flaubert", "FlaubertConfig"),
        ("flava", "FlavaConfig"),
        ("fnet", "FNetConfig"),
NielsRogge's avatar
NielsRogge committed
107
        ("focalnet", "FocalNetConfig"),
108
109
        ("fsmt", "FSMTConfig"),
        ("funnel", "FunnelConfig"),
Pablo Montalvo's avatar
Pablo Montalvo committed
110
        ("fuyu", "FuyuConfig"),
111
        ("gemma", "GemmaConfig"),
112
        ("git", "GitConfig"),
113
        ("glpn", "GLPNConfig"),
114
        ("gpt-sw3", "GPT2Config"),
115
        ("gpt2", "GPT2Config"),
116
        ("gpt_bigcode", "GPTBigCodeConfig"),
117
        ("gpt_neo", "GPTNeoConfig"),
118
        ("gpt_neox", "GPTNeoXConfig"),
119
        ("gpt_neox_japanese", "GPTNeoXJapaneseConfig"),
120
        ("gptj", "GPTJConfig"),
121
        ("gptsan-japanese", "GPTSanJapaneseConfig"),
122
        ("graphormer", "GraphormerConfig"),
123
        ("grounding-dino", "GroundingDinoConfig"),
124
        ("groupvit", "GroupViTConfig"),
125
        ("hubert", "HubertConfig"),
126
        ("ibert", "IBertConfig"),
127
        ("idefics", "IdeficsConfig"),
amyeroberts's avatar
amyeroberts committed
128
        ("idefics2", "Idefics2Config"),
129
        ("imagegpt", "ImageGPTConfig"),
130
        ("informer", "InformerConfig"),
NielsRogge's avatar
NielsRogge committed
131
        ("instructblip", "InstructBlipConfig"),
132
        ("jukebox", "JukeboxConfig"),
Yih-Dar's avatar
Yih-Dar committed
133
        ("kosmos-2", "Kosmos2Config"),
134
135
        ("layoutlm", "LayoutLMConfig"),
        ("layoutlmv2", "LayoutLMv2Config"),
NielsRogge's avatar
NielsRogge committed
136
        ("layoutlmv3", "LayoutLMv3Config"),
137
        ("led", "LEDConfig"),
138
        ("levit", "LevitConfig"),
NielsRogge's avatar
NielsRogge committed
139
        ("lilt", "LiltConfig"),
Jason Phang's avatar
Jason Phang committed
140
        ("llama", "LlamaConfig"),
141
        ("llava", "LlavaConfig"),
NielsRogge's avatar
NielsRogge committed
142
        ("llava_next", "LlavaNextConfig"),
143
        ("longformer", "LongformerConfig"),
Daniel Stancl's avatar
Daniel Stancl committed
144
        ("longt5", "LongT5Config"),
145
146
147
        ("luke", "LukeConfig"),
        ("lxmert", "LxmertConfig"),
        ("m2m_100", "M2M100Config"),
148
        ("mamba", "MambaConfig"),
149
        ("marian", "MarianConfig"),
NielsRogge's avatar
NielsRogge committed
150
        ("markuplm", "MarkupLMConfig"),
Alara Dirik's avatar
Alara Dirik committed
151
        ("mask2former", "Mask2FormerConfig"),
152
        ("maskformer", "MaskFormerConfig"),
153
        ("maskformer-swin", "MaskFormerSwinConfig"),
154
        ("mbart", "MBartConfig"),
Chan Woo Kim's avatar
Chan Woo Kim committed
155
        ("mctct", "MCTCTConfig"),
156
        ("mega", "MegaConfig"),
157
        ("megatron-bert", "MegatronBertConfig"),
wangpeng's avatar
wangpeng committed
158
        ("mgp-str", "MgpstrConfig"),
159
        ("mistral", "MistralConfig"),
160
        ("mixtral", "MixtralConfig"),
161
        ("mobilebert", "MobileBertConfig"),
162
        ("mobilenet_v1", "MobileNetV1Config"),
163
        ("mobilenet_v2", "MobileNetV2Config"),
164
        ("mobilevit", "MobileViTConfig"),
Shehan Munasinghe's avatar
Shehan Munasinghe committed
165
        ("mobilevitv2", "MobileViTV2Config"),
166
        ("mpnet", "MPNetConfig"),
167
        ("mpt", "MptConfig"),
168
        ("mra", "MraConfig"),
169
        ("mt5", "MT5Config"),
Sanchit Gandhi's avatar
Sanchit Gandhi committed
170
        ("musicgen", "MusicgenConfig"),
Yoach Lacombe's avatar
Yoach Lacombe committed
171
        ("musicgen_melody", "MusicgenMelodyConfig"),
StevenTang1998's avatar
StevenTang1998 committed
172
        ("mvp", "MvpConfig"),
173
        ("nat", "NatConfig"),
174
        ("nezha", "NezhaConfig"),
175
        ("nllb-moe", "NllbMoeConfig"),
NielsRogge's avatar
NielsRogge committed
176
        ("nougat", "VisionEncoderDecoderConfig"),
177
        ("nystromformer", "NystromformerConfig"),
Shane A's avatar
Shane A committed
178
        ("olmo", "OlmoConfig"),
Jitesh Jain's avatar
Jitesh Jain committed
179
        ("oneformer", "OneFormerConfig"),
180
        ("open-llama", "OpenLlamaConfig"),
181
        ("openai-gpt", "OpenAIGPTConfig"),
Younes Belkada's avatar
Younes Belkada committed
182
        ("opt", "OPTConfig"),
NielsRogge's avatar
NielsRogge committed
183
        ("owlv2", "Owlv2Config"),
184
        ("owlvit", "OwlViTConfig"),
185
        ("patchtsmixer", "PatchTSMixerConfig"),
186
        ("patchtst", "PatchTSTConfig"),
187
        ("pegasus", "PegasusConfig"),
Jason Phang's avatar
Jason Phang committed
188
        ("pegasus_x", "PegasusXConfig"),
189
        ("perceiver", "PerceiverConfig"),
190
        ("persimmon", "PersimmonConfig"),
Susnato Dhar's avatar
Susnato Dhar committed
191
        ("phi", "PhiConfig"),
Younes Belkada's avatar
Younes Belkada committed
192
        ("pix2struct", "Pix2StructConfig"),
193
194
        ("plbart", "PLBartConfig"),
        ("poolformer", "PoolFormerConfig"),
Susnato Dhar's avatar
Susnato Dhar committed
195
        ("pop2piano", "Pop2PianoConfig"),
196
        ("prophetnet", "ProphetNetConfig"),
Rinat's avatar
Rinat committed
197
        ("pvt", "PvtConfig"),
Nate Cibik's avatar
Nate Cibik committed
198
        ("pvt_v2", "PvtV2Config"),
199
        ("qdqbert", "QDQBertConfig"),
Junyang Lin's avatar
Junyang Lin committed
200
        ("qwen2", "Qwen2Config"),
Bo Zheng's avatar
Bo Zheng committed
201
        ("qwen2_moe", "Qwen2MoeConfig"),
202
203
        ("rag", "RagConfig"),
        ("realm", "RealmConfig"),
Arthur's avatar
Arthur committed
204
        ("recurrent_gemma", "RecurrentGemmaConfig"),
205
        ("reformer", "ReformerConfig"),
206
207
208
209
        ("regnet", "RegNetConfig"),
        ("rembert", "RemBertConfig"),
        ("resnet", "ResNetConfig"),
        ("retribert", "RetriBertConfig"),
210
        ("roberta", "RobertaConfig"),
211
        ("roberta-prelayernorm", "RobertaPreLayerNormConfig"),
Weiwe Shi's avatar
Weiwe Shi committed
212
        ("roc_bert", "RoCBertConfig"),
213
        ("roformer", "RoFormerConfig"),
Sylvain Gugger's avatar
Sylvain Gugger committed
214
        ("rwkv", "RwkvConfig"),
215
        ("sam", "SamConfig"),
216
        ("seamless_m4t", "SeamlessM4TConfig"),
Yoach Lacombe's avatar
Yoach Lacombe committed
217
        ("seamless_m4t_v2", "SeamlessM4Tv2Config"),
218
        ("segformer", "SegformerConfig"),
Eduardo Pacheco's avatar
Eduardo Pacheco committed
219
        ("seggpt", "SegGptConfig"),
220
221
        ("sew", "SEWConfig"),
        ("sew-d", "SEWDConfig"),
NielsRogge's avatar
NielsRogge committed
222
223
        ("siglip", "SiglipConfig"),
        ("siglip_vision_model", "SiglipVisionConfig"),
224
        ("speech-encoder-decoder", "SpeechEncoderDecoderConfig"),
225
226
        ("speech_to_text", "Speech2TextConfig"),
        ("speech_to_text_2", "Speech2Text2Config"),
227
        ("speecht5", "SpeechT5Config"),
Ori Ram's avatar
Ori Ram committed
228
        ("splinter", "SplinterConfig"),
229
        ("squeezebert", "SqueezeBertConfig"),
Jonathan Tow's avatar
Jonathan Tow committed
230
        ("stablelm", "StableLmConfig"),
RaymondLi0's avatar
RaymondLi0 committed
231
        ("starcoder2", "Starcoder2Config"),
232
        ("superpoint", "SuperPointConfig"),
Shehan Munasinghe's avatar
Shehan Munasinghe committed
233
        ("swiftformer", "SwiftFormerConfig"),
234
        ("swin", "SwinConfig"),
NielsRogge's avatar
NielsRogge committed
235
        ("swin2sr", "Swin2SRConfig"),
236
        ("swinv2", "Swinv2Config"),
237
        ("switch_transformers", "SwitchTransformersConfig"),
238
        ("t5", "T5Config"),
239
        ("table-transformer", "TableTransformerConfig"),
240
        ("tapas", "TapasConfig"),
241
        ("time_series_transformer", "TimeSeriesTransformerConfig"),
242
        ("timesformer", "TimesformerConfig"),
amyeroberts's avatar
amyeroberts committed
243
        ("timm_backbone", "TimmBackboneConfig"),
Carl's avatar
Carl committed
244
        ("trajectory_transformer", "TrajectoryTransformerConfig"),
245
246
        ("transfo-xl", "TransfoXLConfig"),
        ("trocr", "TrOCRConfig"),
Zineng Tang's avatar
Zineng Tang committed
247
        ("tvlt", "TvltConfig"),
jiqing-feng's avatar
jiqing-feng committed
248
        ("tvp", "TvpConfig"),
NielsRogge's avatar
NielsRogge committed
249
        ("udop", "UdopConfig"),
250
        ("umt5", "UMT5Config"),
251
        ("unispeech", "UniSpeechConfig"),
252
        ("unispeech-sat", "UniSpeechSatConfig"),
253
        ("univnet", "UnivNetConfig"),
NielsRogge's avatar
NielsRogge committed
254
        ("upernet", "UperNetConfig"),
255
        ("van", "VanConfig"),
NielsRogge's avatar
NielsRogge committed
256
        ("videomae", "VideoMAEConfig"),
257
        ("vilt", "ViltConfig"),
258
        ("vipllava", "VipLlavaConfig"),
259
260
261
262
        ("vision-encoder-decoder", "VisionEncoderDecoderConfig"),
        ("vision-text-dual-encoder", "VisionTextDualEncoderConfig"),
        ("visual_bert", "VisualBertConfig"),
        ("vit", "ViTConfig"),
NielsRogge's avatar
NielsRogge committed
263
        ("vit_hybrid", "ViTHybridConfig"),
264
        ("vit_mae", "ViTMAEConfig"),
265
        ("vit_msn", "ViTMSNConfig"),
NielsRogge's avatar
NielsRogge committed
266
        ("vitdet", "VitDetConfig"),
NielsRogge's avatar
NielsRogge committed
267
        ("vitmatte", "VitMatteConfig"),
Matthijs Hollemans's avatar
Matthijs Hollemans committed
268
        ("vits", "VitsConfig"),
Jegor Kit拧kerkin's avatar
Jegor Kit拧kerkin committed
269
        ("vivit", "VivitConfig"),
270
        ("wav2vec2", "Wav2Vec2Config"),
271
        ("wav2vec2-bert", "Wav2Vec2BertConfig"),
272
        ("wav2vec2-conformer", "Wav2Vec2ConformerConfig"),
Patrick von Platen's avatar
Patrick von Platen committed
273
        ("wavlm", "WavLMConfig"),
274
        ("whisper", "WhisperConfig"),
NielsRogge's avatar
NielsRogge committed
275
        ("xclip", "XCLIPConfig"),
276
277
278
279
280
281
        ("xglm", "XGLMConfig"),
        ("xlm", "XLMConfig"),
        ("xlm-prophetnet", "XLMProphetNetConfig"),
        ("xlm-roberta", "XLMRobertaConfig"),
        ("xlm-roberta-xl", "XLMRobertaXLConfig"),
        ("xlnet", "XLNetConfig"),
Jannis Vamvas's avatar
Jannis Vamvas committed
282
        ("xmod", "XmodConfig"),
283
284
        ("yolos", "YolosConfig"),
        ("yoso", "YosoConfig"),
285
286
    ]
)
287

amyeroberts's avatar
amyeroberts committed
288

289
290
MODEL_NAMES_MAPPING = OrderedDict(
    [
291
        # Add full (and cased) model names here
292
        ("albert", "ALBERT"),
293
        ("align", "ALIGN"),
Jongjyh's avatar
Jongjyh committed
294
        ("altclip", "AltCLIP"),
295
        ("audio-spectrogram-transformer", "Audio Spectrogram Transformer"),
296
        ("autoformer", "Autoformer"),
Yoach Lacombe's avatar
Yoach Lacombe committed
297
        ("bark", "Bark"),
298
299
300
        ("bart", "BART"),
        ("barthez", "BARThez"),
        ("bartpho", "BARTpho"),
NielsRogge's avatar
NielsRogge committed
301
        ("beit", "BEiT"),
302
303
304
        ("bert", "BERT"),
        ("bert-generation", "Bert Generation"),
        ("bert-japanese", "BertJapanese"),
305
        ("bertweet", "BERTweet"),
306
        ("big_bird", "BigBird"),
307
        ("bigbird_pegasus", "BigBird-Pegasus"),
Kamal Raj Kanakarajan's avatar
Kamal Raj Kanakarajan committed
308
        ("biogpt", "BioGpt"),
NielsRogge's avatar
NielsRogge committed
309
        ("bit", "BiT"),
310
311
        ("blenderbot", "Blenderbot"),
        ("blenderbot-small", "BlenderbotSmall"),
Younes Belkada's avatar
Younes Belkada committed
312
        ("blip", "BLIP"),
NielsRogge's avatar
NielsRogge committed
313
        ("blip-2", "BLIP-2"),
Younes Belkada's avatar
Younes Belkada committed
314
        ("bloom", "BLOOM"),
315
        ("bort", "BORT"),
316
        ("bridgetower", "BridgeTower"),
Jinho Park's avatar
Jinho Park committed
317
        ("bros", "BROS"),
318
319
        ("byt5", "ByT5"),
        ("camembert", "CamemBERT"),
320
        ("canine", "CANINE"),
321
        ("chinese_clip", "Chinese-CLIP"),
322
        ("chinese_clip_vision_model", "ChineseCLIPVisionModel"),
323
        ("clap", "CLAP"),
Suraj Patil's avatar
Suraj Patil committed
324
        ("clip", "CLIP"),
325
        ("clip_vision_model", "CLIPVisionModel"),
NielsRogge's avatar
NielsRogge committed
326
        ("clipseg", "CLIPSeg"),
Susnato Dhar's avatar
Susnato Dhar committed
327
        ("clvp", "CLVP"),
328
        ("code_llama", "CodeLlama"),
rooa's avatar
rooa committed
329
        ("codegen", "CodeGen"),
Saurabh Dash's avatar
Saurabh Dash committed
330
        ("cohere", "Cohere"),
331
        ("conditional_detr", "Conditional DETR"),
332
        ("convbert", "ConvBERT"),
333
        ("convnext", "ConvNeXT"),
Alara Dirik's avatar
Alara Dirik committed
334
        ("convnextv2", "ConvNeXTV2"),
335
        ("cpm", "CPM"),
336
        ("cpmant", "CPM-Ant"),
337
        ("ctrl", "CTRL"),
NielsRogge's avatar
NielsRogge committed
338
        ("cvt", "CvT"),
339
340
341
342
343
344
        ("data2vec-audio", "Data2VecAudio"),
        ("data2vec-text", "Data2VecText"),
        ("data2vec-vision", "Data2VecVision"),
        ("deberta", "DeBERTa"),
        ("deberta-v2", "DeBERTa-v2"),
        ("decision_transformer", "Decision Transformer"),
NielsRogge's avatar
NielsRogge committed
345
        ("deformable_detr", "Deformable DETR"),
NielsRogge's avatar
NielsRogge committed
346
        ("deit", "DeiT"),
347
        ("deplot", "DePlot"),
NielsRogge's avatar
NielsRogge committed
348
        ("depth_anything", "Depth Anything"),
NielsRogge's avatar
NielsRogge committed
349
        ("deta", "DETA"),
NielsRogge's avatar
NielsRogge committed
350
        ("detr", "DETR"),
351
        ("dialogpt", "DialoGPT"),
352
        ("dinat", "DiNAT"),
NielsRogge's avatar
NielsRogge committed
353
        ("dinov2", "DINOv2"),
354
355
        ("distilbert", "DistilBERT"),
        ("dit", "DiT"),
NielsRogge's avatar
NielsRogge committed
356
        ("donut-swin", "DonutSwin"),
357
358
        ("dpr", "DPR"),
        ("dpt", "DPT"),
359
        ("efficientformer", "EfficientFormer"),
Alara Dirik's avatar
Alara Dirik committed
360
        ("efficientnet", "EfficientNet"),
361
        ("electra", "ELECTRA"),
362
        ("encodec", "EnCodec"),
363
        ("encoder-decoder", "Encoder decoder"),
364
        ("ernie", "ERNIE"),
365
        ("ernie_m", "ErnieM"),
366
        ("esm", "ESM"),
Matt's avatar
Matt committed
367
        ("falcon", "Falcon"),
368
        ("fastspeech2_conformer", "FastSpeech2Conformer"),
369
        ("flan-t5", "FLAN-T5"),
Arthur's avatar
Arthur committed
370
        ("flan-ul2", "FLAN-UL2"),
371
        ("flaubert", "FlauBERT"),
372
        ("flava", "FLAVA"),
373
        ("fnet", "FNet"),
NielsRogge's avatar
NielsRogge committed
374
        ("focalnet", "FocalNet"),
375
376
        ("fsmt", "FairSeq Machine-Translation"),
        ("funnel", "Funnel Transformer"),
Pablo Montalvo's avatar
Pablo Montalvo committed
377
        ("fuyu", "Fuyu"),
378
        ("gemma", "Gemma"),
379
        ("git", "GIT"),
380
        ("glpn", "GLPN"),
381
        ("gpt-sw3", "GPT-Sw3"),
382
        ("gpt2", "OpenAI GPT-2"),
383
        ("gpt_bigcode", "GPTBigCode"),
Suraj Patil's avatar
Suraj Patil committed
384
        ("gpt_neo", "GPT Neo"),
385
        ("gpt_neox", "GPT NeoX"),
386
        ("gpt_neox_japanese", "GPT NeoX Japanese"),
387
        ("gptj", "GPT-J"),
388
        ("gptsan-japanese", "GPTSAN-japanese"),
389
        ("graphormer", "Graphormer"),
390
        ("grounding-dino", "Grounding DINO"),
391
        ("groupvit", "GroupViT"),
392
393
        ("herbert", "HerBERT"),
        ("hubert", "Hubert"),
Sehoon Kim's avatar
Sehoon Kim committed
394
        ("ibert", "I-BERT"),
395
        ("idefics", "IDEFICS"),
amyeroberts's avatar
amyeroberts committed
396
        ("idefics2", "Idefics2"),
397
        ("imagegpt", "ImageGPT"),
398
        ("informer", "Informer"),
NielsRogge's avatar
NielsRogge committed
399
        ("instructblip", "InstructBLIP"),
400
        ("jukebox", "Jukebox"),
Yih-Dar's avatar
Yih-Dar committed
401
        ("kosmos-2", "KOSMOS-2"),
402
403
        ("layoutlm", "LayoutLM"),
        ("layoutlmv2", "LayoutLMv2"),
NielsRogge's avatar
NielsRogge committed
404
        ("layoutlmv3", "LayoutLMv3"),
405
406
        ("layoutxlm", "LayoutXLM"),
        ("led", "LED"),
407
        ("levit", "LeViT"),
NielsRogge's avatar
NielsRogge committed
408
        ("lilt", "LiLT"),
Jason Phang's avatar
Jason Phang committed
409
        ("llama", "LLaMA"),
410
        ("llama2", "Llama2"),
411
        ("llava", "LLaVa"),
NielsRogge's avatar
NielsRogge committed
412
        ("llava_next", "LLaVA-NeXT"),
413
        ("longformer", "Longformer"),
Daniel Stancl's avatar
Daniel Stancl committed
414
        ("longt5", "LongT5"),
415
416
417
        ("luke", "LUKE"),
        ("lxmert", "LXMERT"),
        ("m2m_100", "M2M100"),
418
        ("madlad-400", "MADLAD-400"),
419
        ("mamba", "Mamba"),
420
        ("marian", "Marian"),
NielsRogge's avatar
NielsRogge committed
421
        ("markuplm", "MarkupLM"),
Alara Dirik's avatar
Alara Dirik committed
422
        ("mask2former", "Mask2Former"),
423
        ("maskformer", "MaskFormer"),
424
        ("maskformer-swin", "MaskFormerSwin"),
425
        ("matcha", "MatCha"),
426
        ("mbart", "mBART"),
427
        ("mbart50", "mBART-50"),
Chan Woo Kim's avatar
Chan Woo Kim committed
428
        ("mctct", "M-CTC-T"),
429
        ("mega", "MEGA"),
430
431
        ("megatron-bert", "Megatron-BERT"),
        ("megatron_gpt2", "Megatron-GPT2"),
wangpeng's avatar
wangpeng committed
432
        ("mgp-str", "MGP-STR"),
433
        ("mistral", "Mistral"),
434
        ("mixtral", "Mixtral"),
435
        ("mluke", "mLUKE"),
436
        ("mms", "MMS"),
437
        ("mobilebert", "MobileBERT"),
438
        ("mobilenet_v1", "MobileNetV1"),
439
        ("mobilenet_v2", "MobileNetV2"),
440
        ("mobilevit", "MobileViT"),
Shehan Munasinghe's avatar
Shehan Munasinghe committed
441
        ("mobilevitv2", "MobileViTV2"),
442
        ("mpnet", "MPNet"),
443
        ("mpt", "MPT"),
444
        ("mra", "MRA"),
445
        ("mt5", "MT5"),
Sanchit Gandhi's avatar
Sanchit Gandhi committed
446
        ("musicgen", "MusicGen"),
Yoach Lacombe's avatar
Yoach Lacombe committed
447
        ("musicgen_melody", "MusicGen Melody"),
StevenTang1998's avatar
StevenTang1998 committed
448
        ("mvp", "MVP"),
449
        ("nat", "NAT"),
450
        ("nezha", "Nezha"),
Lysandre Debut's avatar
Lysandre Debut committed
451
        ("nllb", "NLLB"),
452
        ("nllb-moe", "NLLB-MOE"),
NielsRogge's avatar
NielsRogge committed
453
        ("nougat", "Nougat"),
454
        ("nystromformer", "Nystr枚mformer"),
Shane A's avatar
Shane A committed
455
        ("olmo", "OLMo"),
Jitesh Jain's avatar
Jitesh Jain committed
456
        ("oneformer", "OneFormer"),
457
        ("open-llama", "OpenLlama"),
458
        ("openai-gpt", "OpenAI GPT"),
Younes Belkada's avatar
Younes Belkada committed
459
        ("opt", "OPT"),
NielsRogge's avatar
NielsRogge committed
460
        ("owlv2", "OWLv2"),
461
        ("owlvit", "OWL-ViT"),
462
        ("patchtsmixer", "PatchTSMixer"),
463
        ("patchtst", "PatchTST"),
464
        ("pegasus", "Pegasus"),
Jason Phang's avatar
Jason Phang committed
465
        ("pegasus_x", "PEGASUS-X"),
466
        ("perceiver", "Perceiver"),
467
        ("persimmon", "Persimmon"),
Susnato Dhar's avatar
Susnato Dhar committed
468
        ("phi", "Phi"),
469
        ("phobert", "PhoBERT"),
Younes Belkada's avatar
Younes Belkada committed
470
        ("pix2struct", "Pix2Struct"),
471
472
        ("plbart", "PLBart"),
        ("poolformer", "PoolFormer"),
Susnato Dhar's avatar
Susnato Dhar committed
473
        ("pop2piano", "Pop2Piano"),
474
        ("prophetnet", "ProphetNet"),
Rinat's avatar
Rinat committed
475
        ("pvt", "PVT"),
Nate Cibik's avatar
Nate Cibik committed
476
        ("pvt_v2", "PVTv2"),
477
        ("qdqbert", "QDQBert"),
Junyang Lin's avatar
Junyang Lin committed
478
        ("qwen2", "Qwen2"),
Bo Zheng's avatar
Bo Zheng committed
479
        ("qwen2_moe", "Qwen2MoE"),
480
        ("rag", "RAG"),
481
        ("realm", "REALM"),
Arthur's avatar
Arthur committed
482
        ("recurrent_gemma", "RecurrentGemma"),
483
        ("reformer", "Reformer"),
484
485
486
487
        ("regnet", "RegNet"),
        ("rembert", "RemBERT"),
        ("resnet", "ResNet"),
        ("retribert", "RetriBERT"),
488
        ("roberta", "RoBERTa"),
489
        ("roberta-prelayernorm", "RoBERTa-PreLayerNorm"),
Weiwe Shi's avatar
Weiwe Shi committed
490
        ("roc_bert", "RoCBert"),
491
        ("roformer", "RoFormer"),
Sylvain Gugger's avatar
Sylvain Gugger committed
492
        ("rwkv", "RWKV"),
493
        ("sam", "SAM"),
494
        ("seamless_m4t", "SeamlessM4T"),
Yoach Lacombe's avatar
Yoach Lacombe committed
495
        ("seamless_m4t_v2", "SeamlessM4Tv2"),
496
        ("segformer", "SegFormer"),
Eduardo Pacheco's avatar
Eduardo Pacheco committed
497
        ("seggpt", "SegGPT"),
498
499
        ("sew", "SEW"),
        ("sew-d", "SEW-D"),
NielsRogge's avatar
NielsRogge committed
500
501
        ("siglip", "SigLIP"),
        ("siglip_vision_model", "SiglipVisionModel"),
502
        ("speech-encoder-decoder", "Speech Encoder decoder"),
503
504
        ("speech_to_text", "Speech2Text"),
        ("speech_to_text_2", "Speech2Text2"),
505
        ("speecht5", "SpeechT5"),
Ori Ram's avatar
Ori Ram committed
506
        ("splinter", "Splinter"),
507
        ("squeezebert", "SqueezeBERT"),
Jonathan Tow's avatar
Jonathan Tow committed
508
        ("stablelm", "StableLm"),
RaymondLi0's avatar
RaymondLi0 committed
509
        ("starcoder2", "Starcoder2"),
510
        ("superpoint", "SuperPoint"),
Shehan Munasinghe's avatar
Shehan Munasinghe committed
511
        ("swiftformer", "SwiftFormer"),
512
        ("swin", "Swin Transformer"),
NielsRogge's avatar
NielsRogge committed
513
        ("swin2sr", "Swin2SR"),
514
        ("swinv2", "Swin Transformer V2"),
515
        ("switch_transformers", "SwitchTransformers"),
516
517
        ("t5", "T5"),
        ("t5v1.1", "T5v1.1"),
518
        ("table-transformer", "Table Transformer"),
519
520
        ("tapas", "TAPAS"),
        ("tapex", "TAPEX"),
521
        ("time_series_transformer", "Time Series Transformer"),
522
        ("timesformer", "TimeSformer"),
amyeroberts's avatar
amyeroberts committed
523
        ("timm_backbone", "TimmBackbone"),
Carl's avatar
Carl committed
524
        ("trajectory_transformer", "Trajectory Transformer"),
525
526
        ("transfo-xl", "Transformer-XL"),
        ("trocr", "TrOCR"),
Zineng Tang's avatar
Zineng Tang committed
527
        ("tvlt", "TVLT"),
jiqing-feng's avatar
jiqing-feng committed
528
        ("tvp", "TVP"),
NielsRogge's avatar
NielsRogge committed
529
        ("udop", "UDOP"),
530
        ("ul2", "UL2"),
531
        ("umt5", "UMT5"),
532
        ("unispeech", "UniSpeech"),
533
        ("unispeech-sat", "UniSpeechSat"),
534
        ("univnet", "UnivNet"),
NielsRogge's avatar
NielsRogge committed
535
        ("upernet", "UPerNet"),
536
        ("van", "VAN"),
NielsRogge's avatar
NielsRogge committed
537
        ("videomae", "VideoMAE"),
538
        ("vilt", "ViLT"),
539
        ("vipllava", "VipLlava"),
540
541
        ("vision-encoder-decoder", "Vision Encoder decoder"),
        ("vision-text-dual-encoder", "VisionTextDualEncoder"),
542
        ("visual_bert", "VisualBERT"),
543
        ("vit", "ViT"),
NielsRogge's avatar
NielsRogge committed
544
        ("vit_hybrid", "ViT Hybrid"),
545
        ("vit_mae", "ViTMAE"),
546
        ("vit_msn", "ViTMSN"),
NielsRogge's avatar
NielsRogge committed
547
        ("vitdet", "VitDet"),
NielsRogge's avatar
NielsRogge committed
548
        ("vitmatte", "ViTMatte"),
Matthijs Hollemans's avatar
Matthijs Hollemans committed
549
        ("vits", "VITS"),
Jegor Kit拧kerkin's avatar
Jegor Kit拧kerkin committed
550
        ("vivit", "ViViT"),
551
        ("wav2vec2", "Wav2Vec2"),
552
        ("wav2vec2-bert", "Wav2Vec2-BERT"),
553
        ("wav2vec2-conformer", "Wav2Vec2-Conformer"),
554
        ("wav2vec2_phoneme", "Wav2Vec2Phoneme"),
Patrick von Platen's avatar
Patrick von Platen committed
555
        ("wavlm", "WavLM"),
556
        ("whisper", "Whisper"),
NielsRogge's avatar
NielsRogge committed
557
        ("xclip", "X-CLIP"),
558
559
        ("xglm", "XGLM"),
        ("xlm", "XLM"),
560
        ("xlm-prophetnet", "XLM-ProphetNet"),
561
562
        ("xlm-roberta", "XLM-RoBERTa"),
        ("xlm-roberta-xl", "XLM-RoBERTa-XL"),
563
        ("xlm-v", "XLM-V"),
564
        ("xlnet", "XLNet"),
565
566
        ("xls_r", "XLS-R"),
        ("xlsr_wav2vec2", "XLSR-Wav2Vec2"),
Jannis Vamvas's avatar
Jannis Vamvas committed
567
        ("xmod", "X-MOD"),
568
569
        ("yolos", "YOLOS"),
        ("yoso", "YOSO"),
570
571
572
    ]
)

Yih-Dar's avatar
Yih-Dar committed
573
574
# This is tied to the processing `-` -> `_` in `model_type_to_module_name`. For example, instead of putting
# `transfo-xl` (as in `CONFIG_MAPPING_NAMES`), we should use `transfo_xl`.
Sylvain Gugger's avatar
Sylvain Gugger committed
575
576
577
578
DEPRECATED_MODELS = [
    "bort",
    "mctct",
    "mmbt",
579
    "open_llama",
Sylvain Gugger's avatar
Sylvain Gugger committed
580
    "retribert",
581
    "tapex",
Sylvain Gugger's avatar
Sylvain Gugger committed
582
    "trajectory_transformer",
Yih-Dar's avatar
Yih-Dar committed
583
    "transfo_xl",
Sylvain Gugger's avatar
Sylvain Gugger committed
584
585
586
    "van",
]

587
SPECIAL_MODEL_TYPE_TO_MODULE_NAME = OrderedDict(
588
589
590
591
592
    [
        ("openai-gpt", "openai"),
        ("data2vec-audio", "data2vec"),
        ("data2vec-text", "data2vec"),
        ("data2vec-vision", "data2vec"),
NielsRogge's avatar
NielsRogge committed
593
        ("donut-swin", "donut"),
Yih-Dar's avatar
Yih-Dar committed
594
        ("kosmos-2", "kosmos2"),
595
        ("maskformer-swin", "maskformer"),
NielsRogge's avatar
NielsRogge committed
596
        ("xclip", "x_clip"),
597
        ("clip_vision_model", "clip"),
NielsRogge's avatar
NielsRogge committed
598
        ("siglip_vision_model", "siglip"),
599
        ("chinese_clip_vision_model", "chinese_clip"),
600
    ]
601
)
602
603
604
605
606
607
608
609


def model_type_to_module_name(key):
    """Converts a config key to the corresponding module."""
    # Special treatment
    if key in SPECIAL_MODEL_TYPE_TO_MODULE_NAME:
        return SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key]

Sylvain Gugger's avatar
Sylvain Gugger committed
610
611
612
613
614
    key = key.replace("-", "_")
    if key in DEPRECATED_MODELS:
        key = f"deprecated.{key}"

    return key
615
616
617
618
619
620
621


def config_class_to_model_type(config):
    """Converts a config class name to the corresponding model type"""
    for key, cls in CONFIG_MAPPING_NAMES.items():
        if cls == config:
            return key
622
623
624
625
    # if key not found check in extra content
    for key, cls in CONFIG_MAPPING._extra_content.items():
        if cls.__name__ == config:
            return key
626
627
628
629
630
631
632
633
634
635
    return None


class _LazyConfigMapping(OrderedDict):
    """
    A dictionary that lazily load its values when they are requested.
    """

    def __init__(self, mapping):
        self._mapping = mapping
636
        self._extra_content = {}
637
638
639
        self._modules = {}

    def __getitem__(self, key):
640
641
        if key in self._extra_content:
            return self._extra_content[key]
642
643
644
645
646
647
        if key not in self._mapping:
            raise KeyError(key)
        value = self._mapping[key]
        module_name = model_type_to_module_name(key)
        if module_name not in self._modules:
            self._modules[module_name] = importlib.import_module(f".{module_name}", "transformers.models")
648
649
650
651
652
653
654
        if hasattr(self._modules[module_name], value):
            return getattr(self._modules[module_name], value)

        # Some of the mappings have entries model_type -> config of another model type. In that case we try to grab the
        # object at the top level.
        transformers_module = importlib.import_module("transformers")
        return getattr(transformers_module, value)
655
656

    def keys(self):
657
        return list(self._mapping.keys()) + list(self._extra_content.keys())
658
659

    def values(self):
660
        return [self[k] for k in self._mapping.keys()] + list(self._extra_content.values())
661

662
    def items(self):
663
        return [(k, self[k]) for k in self._mapping.keys()] + list(self._extra_content.items())
664
665

    def __iter__(self):
666
        return iter(list(self._mapping.keys()) + list(self._extra_content.keys()))
667
668

    def __contains__(self, item):
669
670
        return item in self._mapping or item in self._extra_content

671
    def register(self, key, value, exist_ok=False):
672
673
674
        """
        Register a new configuration in this mapping.
        """
675
        if key in self._mapping.keys() and not exist_ok:
676
677
            raise ValueError(f"'{key}' is already used by a Transformers config, pick another name.")
        self._extra_content[key] = value
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734


CONFIG_MAPPING = _LazyConfigMapping(CONFIG_MAPPING_NAMES)


class _LazyLoadAllMappings(OrderedDict):
    """
    A mapping that will load all pairs of key values at the first access (either by indexing, requestions keys, values,
    etc.)

    Args:
        mapping: The mapping to load.
    """

    def __init__(self, mapping):
        self._mapping = mapping
        self._initialized = False
        self._data = {}

    def _initialize(self):
        if self._initialized:
            return

        for model_type, map_name in self._mapping.items():
            module_name = model_type_to_module_name(model_type)
            module = importlib.import_module(f".{module_name}", "transformers.models")
            mapping = getattr(module, map_name)
            self._data.update(mapping)

        self._initialized = True

    def __getitem__(self, key):
        self._initialize()
        return self._data[key]

    def keys(self):
        self._initialize()
        return self._data.keys()

    def values(self):
        self._initialize()
        return self._data.values()

    def items(self):
        self._initialize()
        return self._data.keys()

    def __iter__(self):
        self._initialize()
        return iter(self._data)

    def __contains__(self, item):
        self._initialize()
        return item in self._data


def _get_class_name(model_class: Union[str, List[str]]):
735
    if isinstance(model_class, (list, tuple)):
Stas Bekman's avatar
Stas Bekman committed
736
737
        return " or ".join([f"[`{c}`]" for c in model_class if c is not None])
    return f"[`{model_class}`]"
738
739


740
741
742
743
744
def _list_model_options(indent, config_to_class=None, use_model_types=True):
    if config_to_class is None and not use_model_types:
        raise ValueError("Using `use_model_types=False` requires a `config_to_class` dictionary.")
    if use_model_types:
        if config_to_class is None:
Stas Bekman's avatar
Stas Bekman committed
745
            model_type_to_name = {model_type: f"[`{config}`]" for model_type, config in CONFIG_MAPPING_NAMES.items()}
746
747
        else:
            model_type_to_name = {
748
749
750
                model_type: _get_class_name(model_class)
                for model_type, model_class in config_to_class.items()
                if model_type in MODEL_NAMES_MAPPING
751
752
            }
        lines = [
753
            f"{indent}- **{model_type}** -- {model_type_to_name[model_type]} ({MODEL_NAMES_MAPPING[model_type]} model)"
754
            for model_type in sorted(model_type_to_name.keys())
755
756
        ]
    else:
757
758
759
760
761
        config_to_name = {
            CONFIG_MAPPING_NAMES[config]: _get_class_name(clas)
            for config, clas in config_to_class.items()
            if config in CONFIG_MAPPING_NAMES
        }
762
        config_to_model_name = {
763
            config: MODEL_NAMES_MAPPING[model_type] for model_type, config in CONFIG_MAPPING_NAMES.items()
764
765
        }
        lines = [
Sylvain Gugger's avatar
Sylvain Gugger committed
766
767
            f"{indent}- [`{config_name}`] configuration class:"
            f" {config_to_name[config_name]} ({config_to_model_name[config_name]} model)"
768
            for config_name in sorted(config_to_name.keys())
769
770
771
772
773
774
775
        ]
    return "\n".join(lines)


def replace_list_option_in_docstrings(config_to_class=None, use_model_types=True):
    def docstring_decorator(fn):
        docstrings = fn.__doc__
776
777
778
        if docstrings is None:
            # Example: -OO
            return fn
779
780
781
782
783
784
785
786
787
788
789
790
        lines = docstrings.split("\n")
        i = 0
        while i < len(lines) and re.search(r"^(\s*)List options\s*$", lines[i]) is None:
            i += 1
        if i < len(lines):
            indent = re.search(r"^(\s*)List options\s*$", lines[i]).groups()[0]
            if use_model_types:
                indent = f"{indent}    "
            lines[i] = _list_model_options(indent, config_to_class=config_to_class, use_model_types=use_model_types)
            docstrings = "\n".join(lines)
        else:
            raise ValueError(
Sylvain Gugger's avatar
Sylvain Gugger committed
791
792
                f"The function {fn} should have an empty 'List options' in its docstring as placeholder, current"
                f" docstring is:\n{docstrings}"
793
794
795
796
797
798
799
            )
        fn.__doc__ = docstrings
        return fn

    return docstring_decorator


Julien Chaumond's avatar
Julien Chaumond committed
800
class AutoConfig:
Lysandre Debut's avatar
Lysandre Debut committed
801
    r"""
802
    This is a generic configuration class that will be instantiated as one of the configuration classes of the library
803
    when created with the [`~AutoConfig.from_pretrained`] class method.
804

805
    This class cannot be instantiated directly using `__init__()` (throws an error).
806
    """
807

808
    def __init__(self):
809
810
811
812
        raise EnvironmentError(
            "AutoConfig is designed to be instantiated "
            "using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
        )
813

814
    @classmethod
815
816
817
818
    def for_model(cls, model_type: str, *args, **kwargs):
        if model_type in CONFIG_MAPPING:
            config_class = CONFIG_MAPPING[model_type]
            return config_class(*args, **kwargs)
819
        raise ValueError(
820
            f"Unrecognized model identifier: {model_type}. Should contain one of {', '.join(CONFIG_MAPPING.keys())}"
821
        )
822

823
    @classmethod
824
    @replace_list_option_in_docstrings()
825
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
826
827
        r"""
        Instantiate one of the configuration classes of the library from a pretrained model configuration.
828

Sylvain Gugger's avatar
Sylvain Gugger committed
829
830
        The configuration class to instantiate is selected based on the `model_type` property of the config object that
        is loaded, or when it's missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:
831

832
        List options
Lysandre Debut's avatar
Lysandre Debut committed
833
834

        Args:
835
            pretrained_model_name_or_path (`str` or `os.PathLike`):
836
837
                Can be either:

838
                    - A string, the *model id* of a pretrained model configuration hosted inside a model repo on
839
                      huggingface.co.
840
                    - A path to a *directory* containing a configuration file saved using the
Sylvain Gugger's avatar
Sylvain Gugger committed
841
842
                      [`~PretrainedConfig.save_pretrained`] method, or the [`~PreTrainedModel.save_pretrained`] method,
                      e.g., `./my_model_directory/`.
843
844
845
                    - A path or url to a saved configuration JSON *file*, e.g.,
                      `./my_model_directory/configuration.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
846
847
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
848
            force_download (`bool`, *optional*, defaults to `False`):
849
850
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
851
            resume_download (`bool`, *optional*, defaults to `False`):
852
853
                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
                file exists.
854
            proxies (`Dict[str, str]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
855
856
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
857
            revision (`str`, *optional*, defaults to `"main"`):
Julien Chaumond's avatar
Julien Chaumond committed
858
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
859
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
Julien Chaumond's avatar
Julien Chaumond committed
860
                identifier allowed by git.
861
862
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final configuration object.
863

Sylvain Gugger's avatar
Sylvain Gugger committed
864
865
866
                If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
                dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
                part of `kwargs` which has not been used to update `config` and is otherwise ignored.
867
            trust_remote_code (`bool`, *optional*, defaults to `False`):
868
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
Sylvain Gugger's avatar
Sylvain Gugger committed
869
870
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
871
            kwargs(additional keyword arguments, *optional*):
872
                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
Sylvain Gugger's avatar
Sylvain Gugger committed
873
                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
874
                by the `return_unused_kwargs` keyword parameter.
Lysandre Debut's avatar
Lysandre Debut committed
875

876
        Examples:
877

878
879
        ```python
        >>> from transformers import AutoConfig
880

881
        >>> # Download configuration from huggingface.co and cache.
882
        >>> config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
Lysandre Debut's avatar
Lysandre Debut committed
883

884
        >>> # Download configuration from huggingface.co (user-uploaded) and cache.
Sylvain Gugger's avatar
Sylvain Gugger committed
885
        >>> config = AutoConfig.from_pretrained("dbmdz/bert-base-german-cased")
Lysandre Debut's avatar
Lysandre Debut committed
886

887
        >>> # If configuration file is in a directory (e.g., was saved using *save_pretrained('./test/saved_model/')*).
Sylvain Gugger's avatar
Sylvain Gugger committed
888
        >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/")
889

890
        >>> # Load a specific configuration file.
Sylvain Gugger's avatar
Sylvain Gugger committed
891
        >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/my_configuration.json")
892

893
        >>> # Change some config attributes when loading a pretrained config.
894
        >>> config = AutoConfig.from_pretrained("google-bert/bert-base-uncased", output_attentions=True, foo=False)
895
896
        >>> config.output_attentions
        True
Sylvain Gugger's avatar
Sylvain Gugger committed
897
898

        >>> config, unused_kwargs = AutoConfig.from_pretrained(
899
        ...     "google-bert/bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
Sylvain Gugger's avatar
Sylvain Gugger committed
900
        ... )
901
902
        >>> config.output_attentions
        True
Sylvain Gugger's avatar
Sylvain Gugger committed
903

904
        >>> unused_kwargs
905
906
        {'foo': False}
        ```"""
907
908
909
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
910
911
                "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
                FutureWarning,
912
913
914
915
916
917
918
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

919
        kwargs["_from_auto"] = True
920
        kwargs["name_or_path"] = pretrained_model_name_or_path
921
        trust_remote_code = kwargs.pop("trust_remote_code", None)
922
923
        code_revision = kwargs.pop("code_revision", None)

924
        config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
925
926
927
928
929
930
931
        has_remote_code = "auto_map" in config_dict and "AutoConfig" in config_dict["auto_map"]
        has_local_code = "model_type" in config_dict and config_dict["model_type"] in CONFIG_MAPPING
        trust_remote_code = resolve_trust_remote_code(
            trust_remote_code, pretrained_model_name_or_path, has_local_code, has_remote_code
        )

        if has_remote_code and trust_remote_code:
932
            class_ref = config_dict["auto_map"]["AutoConfig"]
933
934
935
            config_class = get_class_from_dynamic_module(
                class_ref, pretrained_model_name_or_path, code_revision=code_revision, **kwargs
            )
936
937
            if os.path.isdir(pretrained_model_name_or_path):
                config_class.register_for_auto_class()
938
939
            return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
        elif "model_type" in config_dict:
940
941
942
943
944
945
946
947
            try:
                config_class = CONFIG_MAPPING[config_dict["model_type"]]
            except KeyError:
                raise ValueError(
                    f"The checkpoint you are trying to load has model type `{config_dict['model_type']}` "
                    "but Transformers does not recognize this architecture. This could be because of an "
                    "issue with the checkpoint, or because your version of Transformers is out of date."
                )
948
            return config_class.from_dict(config_dict, **unused_kwargs)
949
950
        else:
            # Fallback: use pattern matching on the string.
951
952
            # We go from longer names to shorter names to catch roberta before bert (for instance)
            for pattern in sorted(CONFIG_MAPPING.keys(), key=len, reverse=True):
953
                if pattern in str(pretrained_model_name_or_path):
954
                    return CONFIG_MAPPING[pattern].from_dict(config_dict, **unused_kwargs)
955

956
        raise ValueError(
957
            f"Unrecognized model in {pretrained_model_name_or_path}. "
958
            f"Should have a `model_type` key in its {CONFIG_NAME}, or contain one of the following strings "
959
            f"in its name: {', '.join(CONFIG_MAPPING.keys())}"
960
        )
961
962

    @staticmethod
zspo's avatar
zspo committed
963
    def register(model_type, config, exist_ok=False):
964
965
966
967
        """
        Register a new configuration for this class.

        Args:
968
969
            model_type (`str`): The model type like "bert" or "gpt".
            config ([`PretrainedConfig`]): The config to register.
970
971
972
973
974
975
976
        """
        if issubclass(config, PretrainedConfig) and config.model_type != model_type:
            raise ValueError(
                "The config you are passing has a `model_type` attribute that is not consistent with the model type "
                f"you passed (config has {config.model_type} and you passed {model_type}. Fix one of those so they "
                "match!"
            )
zspo's avatar
zspo committed
977
        CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok)
978
979
980


ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = _LazyLoadAllMappings(CONFIG_ARCHIVE_MAP_MAPPING_NAMES)