"vscode:/vscode.git/clone" did not exist on "b8112eddecfd524038e3c10970c06a444a32aa9d"
configuration_utils.py 45.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Configuration base class and utilities."""


import copy
import json
import os
22
import re
23
import warnings
24
from typing import Any, Dict, List, Optional, Tuple, Union
25
26

from packaging import version
27

28
29
from requests import HTTPError

30
from . import __version__
31
from .dynamic_module_utils import custom_object_save
32
33
from .file_utils import (
    CONFIG_NAME,
34
    EntryNotFoundError,
35
    PushToHubMixin,
36
37
    RepositoryNotFoundError,
    RevisionNotFoundError,
38
39
40
41
42
    cached_path,
    copy_func,
    hf_bucket_url,
    is_offline_mode,
    is_remote_url,
43
    is_torch_available,
44
)
Lysandre Debut's avatar
Lysandre Debut committed
45
from .utils import logging
Aymeric Augustin's avatar
Aymeric Augustin committed
46

47

Lysandre Debut's avatar
Lysandre Debut committed
48
logger = logging.get_logger(__name__)
49

50
_re_configuration_file = re.compile(r"config\.(.*)\.json")
51

52

Sylvain Gugger's avatar
Sylvain Gugger committed
53
class PretrainedConfig(PushToHubMixin):
Sylvain Gugger's avatar
Sylvain Gugger committed
54
55
56
    r"""
    Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as
    methods for loading/downloading/saving configurations.
Lysandre's avatar
Lysandre committed
57

Sylvain Gugger's avatar
Sylvain Gugger committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    <Tip>

    A configuration file can be loaded and saved to disk. Loading the configuration file and using this file to
    initialize a model does **not** load the model weights. It only affects the model's configuration.

    </Tip>

    Class attributes (overridden by derived classes):

    - **model_type** (`str`) -- An identifier for the model type, serialized into the JSON file, and used to recreate
      the correct object in [`~transformers.AutoConfig`].
    - **is_composition** (`bool`) -- Whether the config class is composed of multiple sub-configs. In this case the
      config has to be initialized from two or more configs of type [`~transformers.PretrainedConfig`] like:
      [`~transformers.EncoderDecoderConfig`] or [`~RagConfig`].
    - **keys_to_ignore_at_inference** (`List[str]`) -- A list of keys to ignore by default when looking at dictionary
      outputs of the model during inference.
    - **attribute_map** (`Dict[str, str]`) -- A dict that maps model specific attribute names to the standardized
      naming of attributes.

    Common attributes (present in all subclasses):

    - **vocab_size** (`int`) -- The number of tokens in the vocabulary, which is also the first dimension of the
      embeddings matrix (this attribute may be missing for models that don't have a text modality like ViT).
    - **hidden_size** (`int`) -- The hidden size of the model.
    - **num_attention_heads** (`int`) -- The number of attention heads used in the multi-head attention layers of the
      model.
    - **num_hidden_layers** (`int`) -- The number of blocks in the model.

    Arg:
87
        name_or_path (`str`, *optional*, defaults to `""`):
Sylvain Gugger's avatar
Sylvain Gugger committed
88
89
90
            Store the string that was passed to [`PreTrainedModel.from_pretrained`] or
            [`TFPreTrainedModel.from_pretrained`] as `pretrained_model_name_or_path` if the configuration was created
            with such a method.
91
        output_hidden_states (`bool`, *optional*, defaults to `False`):
Lysandre's avatar
Lysandre committed
92
            Whether or not the model should return all hidden-states.
93
        output_attentions (`bool`, *optional*, defaults to `False`):
Lysandre's avatar
Lysandre committed
94
            Whether or not the model should returns all attentions.
95
        return_dict (`bool`, *optional*, defaults to `True`):
Sylvain Gugger's avatar
Sylvain Gugger committed
96
            Whether or not the model should return a [`~transformers.file_utils.ModelOutput`] instead of a plain tuple.
97
        is_encoder_decoder (`bool`, *optional*, defaults to `False`):
Lysandre's avatar
Lysandre committed
98
            Whether the model is used as an encoder/decoder or not.
99
        is_decoder (`bool`, *optional*, defaults to `False`):
Lysandre's avatar
Lysandre committed
100
            Whether the model is used as decoder or not (in which case it's used as an encoder).
101
        cross_attention_hidden_size** (`bool`, *optional*):
102
103
            The hidden size of the cross-attention layer in case the model is used as a decoder in an encoder-decoder
            setting and the cross-attention hidden dimension differs from `self.config.hidden_size`.
104
        add_cross_attention (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
105
            Whether cross-attention layers should be added to the model. Note, this option is only relevant for models
Sylvain Gugger's avatar
Sylvain Gugger committed
106
107
            that can be used as decoder models within the [`EncoderDecoderModel`] class, which consists of all models
            in `AUTO_MODELS_FOR_CAUSAL_LM`.
108
        tie_encoder_decoder (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
109
110
            Whether all encoder weights should be tied to their equivalent decoder weights. This requires the encoder
            and decoder model to have the exact same parameter names.
111
        prune_heads (`Dict[int, List[int]]`, *optional*, defaults to `{}`):
Sylvain Gugger's avatar
Sylvain Gugger committed
112
113
            Pruned heads of the model. The keys are the selected layer indices and the associated values, the list of
            heads to prune in said layer.
Lysandre's avatar
Lysandre committed
114

Sylvain Gugger's avatar
Sylvain Gugger committed
115
            For instance `{1: [0, 2], 2: [2, 3]}` will prune heads 0 and 2 on layer 1 and heads 2 and 3 on layer 2.
116
        chunk_size_feed_forward (`int`, *optional*, defaults to `0`):
Sylvain Gugger's avatar
Sylvain Gugger committed
117
118
119
120
121
122
123
            The chunk size of all feed forward layers in the residual attention blocks. A chunk size of `0` means that
            the feed forward layer is not chunked. A chunk size of n means that the feed forward layer processes `n` <
            sequence_length embeddings at a time. For more information on feed forward chunking, see [How does Feed
            Forward Chunking work?](../glossary.html#feed-forward-chunking).

        > Parameters for sequence generation

124
        max_length (`int`, *optional*, defaults to 20):
Sylvain Gugger's avatar
Sylvain Gugger committed
125
            Maximum length that will be used by default in the `generate` method of the model.
126
        min_length (`int`, *optional*, defaults to 10):
Sylvain Gugger's avatar
Sylvain Gugger committed
127
            Minimum length that will be used by default in the `generate` method of the model.
128
        do_sample (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
129
130
            Flag that will be used by default in the `generate` method of the model. Whether or not to use sampling ;
            use greedy decoding otherwise.
131
        early_stopping (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
132
133
            Flag that will be used by default in the `generate` method of the model. Whether to stop the beam search
            when at least `num_beams` sentences are finished per batch or not.
134
        num_beams (`int`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
135
136
            Number of beams for beam search that will be used by default in the `generate` method of the model. 1 means
            no beam search.
137
        num_beam_groups (`int`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
138
139
            Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams
            that will be used by default in the `generate` method of the model. 1 means no group beam search.
140
        diversity_penalty (`float`, *optional*, defaults to 0.0):
Sylvain Gugger's avatar
Sylvain Gugger committed
141
142
            Value to control diversity for group beam search. that will be used by default in the `generate` method of
            the model. 0 means no diversity penalty. The higher the penalty, the more diverse are the outputs.
143
        temperature (`float`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
144
145
            The value used to module the next token probabilities that will be used by default in the `generate` method
            of the model. Must be strictly positive.
146
        top_k (`int`, *optional*, defaults to 50):
Sylvain Gugger's avatar
Sylvain Gugger committed
147
148
            Number of highest probability vocabulary tokens to keep for top-k-filtering that will be used by default in
            the `generate` method of the model.
149
        top_p (`float`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
150
151
            Value that will be used by default in the `generate` method of the model for `top_p`. If set to float < 1,
            only the most probable tokens with probabilities that add up to `top_p` or higher are kept for generation.
152
        repetition_penalty (`float`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
153
154
            Parameter for repetition penalty that will be used by default in the `generate` method of the model. 1.0
            means no penalty.
155
        length_penalty (`float`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
156
            Exponential penalty to the length that will be used by default in the `generate` method of the model.
157
        no_repeat_ngram_size (`int`, *optional*, defaults to 0) -- Value that will be used by default in the
Sylvain Gugger's avatar
Sylvain Gugger committed
158
159
            `generate` method of the model for `no_repeat_ngram_size`. If set to int > 0, all ngrams of that size can
            only occur once.
160
        encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0) -- Value that will be used by
Sylvain Gugger's avatar
Sylvain Gugger committed
161
162
            default in the `generate` method of the model for `encoder_no_repeat_ngram_size`. If set to int > 0, all
            ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`.
163
        bad_words_ids (`List[int]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
164
165
166
            List of token ids that are not allowed to be generated that will be used by default in the `generate`
            method of the model. In order to get the tokens of the words that should not appear in the generated text,
            use `tokenizer.encode(bad_word, add_prefix_space=True)`.
167
        num_return_sequences (`int`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
168
169
            Number of independently computed returned sequences for each element in the batch that will be used by
            default in the `generate` method of the model.
170
        output_scores (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
171
            Whether the model should return the logits when used for generation.
172
        return_dict_in_generate (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
173
            Whether the model should return a [`~transformers.file_utils.ModelOutput`] instead of a `torch.LongTensor`.
174
        forced_bos_token_id (`int`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
175
176
177
            The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for
            multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target
            language token.
178
        forced_eos_token_id (`int`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
179
            The id of the token to force as the last generated token when `max_length` is reached.
180
        remove_invalid_values (`bool`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
181
182
183
184
185
            Whether to remove possible _nan_ and _inf_ outputs of the model to prevent the generation method to crash.
            Note that using `remove_invalid_values` can slow down generation.

        > Parameters for fine-tuning tasks

Sylvain Gugger's avatar
Sylvain Gugger committed
186
187
        architectures (`List[str]`, *optional*):
            Model architectures that can be used with the model pretrained weights.
188
        finetuning_task (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
189
190
            Name of the task used to fine-tune the model. This can be used when converting from an original (TensorFlow
            or PyTorch) checkpoint.
191
        id2label (`Dict[int, str]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
192
            A map from index (for instance prediction index, or target index) to label.
193
194
        label2id (`Dict[str, int]`, *optional*): A map from label to index for the model.
        num_labels (`int`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
195
            Number of labels to use in the last layer added to the model, typically for a classification task.
196
        task_specific_params (`Dict[str, Any]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
197
            Additional keyword arguments to store for the current task.
198
        problem_type (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
199
200
201
202
203
            Problem type for `XxxForSequenceClassification` models. Can be one of `"regression"`,
            `"single_label_classification"` or `"multi_label_classification"`.

        > Parameters linked to the tokenizer

204
        tokenizer_class (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
205
206
            The name of the associated tokenizer class to use (if none is set, will use the tokenizer associated to the
            model by default).
207
        prefix (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
208
            A specific prompt that should be added at the beginning of each text before calling the model.
209
210
211
212
        bos_token_id (`int`, *optional*): The id of the _beginning-of-stream_ token.
        pad_token_id (`int`, *optional*): The id of the _padding_ token.
        eos_token_id (`int`, *optional*): The id of the _end-of-stream_ token.
        decoder_start_token_id (`int`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
213
            If an encoder-decoder model starts decoding with a different token than _bos_, the id of that token.
214
        sep_token_id (`int`, *optional*): The id of the _separation_ token.
Sylvain Gugger's avatar
Sylvain Gugger committed
215
216
217

        > PyTorch specific parameters

218
        torchscript (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
219
            Whether or not the model should be used with Torchscript.
220
        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
Sylvain Gugger's avatar
Sylvain Gugger committed
221
222
            Whether the model's input and output word embeddings should be tied. Note that this is only relevant if the
            model has a output word embedding layer.
223
        torch_dtype (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
224
225
226
227
228
229
230
231
232
233
234
235
            The `dtype` of the weights. This attribute can be used to initialize the model to a non-default `dtype`
            (which is normally `float32`) and thus allow for optimal storage allocation. For example, if the saved
            model is `float16`, ideally we want to load it back using the minimal amount of memory needed to load
            `float16` weights. Since the config object is stored in plain text, this attribute contains just the
            floating type string without the `torch.` prefix. For example, for `torch.float16` ``torch_dtype` is the
            `"float16"` string.

            This attribute is currently not being used during model loading time, but this may change in the future
            versions. But we can already start preparing for the future by saving the dtype with save_pretrained.

        > TensorFlow specific parameters

236
        use_bfloat16 (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
237
            Whether or not the model should use BFloat16 scalars (only used by some TensorFlow models).
238
    """
239
    model_type: str = ""
240
    is_composition: bool = False
241
    attribute_map: Dict[str, str] = {}
242
    _auto_class: Optional[str] = None
243
244
245
246
247
248
249
250
251
252

    def __setattr__(self, key, value):
        if key in super().__getattribute__("attribute_map"):
            key = super().__getattribute__("attribute_map")[key]
        super().__setattr__(key, value)

    def __getattribute__(self, key):
        if key != "attribute_map" and key in super().__getattribute__("attribute_map"):
            key = super().__getattribute__("attribute_map")[key]
        return super().__getattribute__(key)
253
254

    def __init__(self, **kwargs):
thomwolf's avatar
thomwolf committed
255
        # Attributes with defaults
256
        self.return_dict = kwargs.pop("return_dict", True)
257
        self.output_hidden_states = kwargs.pop("output_hidden_states", False)
258
        self.output_attentions = kwargs.pop("output_attentions", False)
259
        self.torchscript = kwargs.pop("torchscript", False)  # Only used by PyTorch models
260
        self.torch_dtype = kwargs.pop("torch_dtype", None)  # Only used by PyTorch models
261
262
        self.use_bfloat16 = kwargs.pop("use_bfloat16", False)
        self.pruned_heads = kwargs.pop("pruned_heads", {})
263
264
265
        self.tie_word_embeddings = kwargs.pop(
            "tie_word_embeddings", True
        )  # Whether input and output word embeddings should be tied for all MLM, LM and Seq2Seq models.
thomwolf's avatar
thomwolf committed
266
267

        # Is decoder is used in encoder-decoder models to differentiate encoder from decoder
Patrick von Platen's avatar
Patrick von Platen committed
268
        self.is_encoder_decoder = kwargs.pop("is_encoder_decoder", False)
269
        self.is_decoder = kwargs.pop("is_decoder", False)
270
        self.cross_attention_hidden_size = kwargs.pop("cross_attention_hidden_size", None)
271
        self.add_cross_attention = kwargs.pop("add_cross_attention", False)
272
        self.tie_encoder_decoder = kwargs.pop("tie_encoder_decoder", False)
273

thomwolf's avatar
thomwolf committed
274
        # Parameters for sequence generation
275
        self.max_length = kwargs.pop("max_length", 20)
Patrick von Platen's avatar
Patrick von Platen committed
276
        self.min_length = kwargs.pop("min_length", 0)
277
        self.do_sample = kwargs.pop("do_sample", False)
Patrick von Platen's avatar
Patrick von Platen committed
278
        self.early_stopping = kwargs.pop("early_stopping", False)
279
        self.num_beams = kwargs.pop("num_beams", 1)
280
281
        self.num_beam_groups = kwargs.pop("num_beam_groups", 1)
        self.diversity_penalty = kwargs.pop("diversity_penalty", 0.0)
282
283
284
        self.temperature = kwargs.pop("temperature", 1.0)
        self.top_k = kwargs.pop("top_k", 50)
        self.top_p = kwargs.pop("top_p", 1.0)
285
        self.typical_p = kwargs.pop("typical_p", 1.0)
286
287
        self.repetition_penalty = kwargs.pop("repetition_penalty", 1.0)
        self.length_penalty = kwargs.pop("length_penalty", 1.0)
Patrick von Platen's avatar
Patrick von Platen committed
288
        self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size", 0)
289
        self.encoder_no_repeat_ngram_size = kwargs.pop("encoder_no_repeat_ngram_size", 0)
290
        self.bad_words_ids = kwargs.pop("bad_words_ids", None)
291
        self.num_return_sequences = kwargs.pop("num_return_sequences", 1)
Pradhy729's avatar
Pradhy729 committed
292
        self.chunk_size_feed_forward = kwargs.pop("chunk_size_feed_forward", 0)
293
294
        self.output_scores = kwargs.pop("output_scores", False)
        self.return_dict_in_generate = kwargs.pop("return_dict_in_generate", False)
295
296
        self.forced_bos_token_id = kwargs.pop("forced_bos_token_id", None)
        self.forced_eos_token_id = kwargs.pop("forced_eos_token_id", None)
297
        self.remove_invalid_values = kwargs.pop("remove_invalid_values", False)
thomwolf's avatar
thomwolf committed
298

thomwolf's avatar
thomwolf committed
299
        # Fine-tuning task arguments
Julien Chaumond's avatar
Julien Chaumond committed
300
        self.architectures = kwargs.pop("architectures", None)
301
        self.finetuning_task = kwargs.pop("finetuning_task", None)
302
303
304
        self.id2label = kwargs.pop("id2label", None)
        self.label2id = kwargs.pop("label2id", None)
        if self.id2label is not None:
305
            kwargs.pop("num_labels", None)
306
307
308
309
            self.id2label = dict((int(key), value) for key, value in self.id2label.items())
            # Keys are always strings in JSON so convert ids to int here.
        else:
            self.num_labels = kwargs.pop("num_labels", 2)
thomwolf's avatar
thomwolf committed
310

311
312
313
314
315
316
317
318
        if self.torch_dtype is not None and isinstance(self.torch_dtype, str):
            # we will start using self.torch_dtype in v5, but to be consistent with
            # from_pretrained's torch_dtype arg convert it to an actual torch.dtype object
            if is_torch_available():
                import torch

                self.torch_dtype = getattr(torch, self.torch_dtype)

319
        # Tokenizer arguments TODO: eventually tokenizer and models should share the same config
320
        self.tokenizer_class = kwargs.pop("tokenizer_class", None)
321
322
323
324
        self.prefix = kwargs.pop("prefix", None)
        self.bos_token_id = kwargs.pop("bos_token_id", None)
        self.pad_token_id = kwargs.pop("pad_token_id", None)
        self.eos_token_id = kwargs.pop("eos_token_id", None)
325
326
        self.sep_token_id = kwargs.pop("sep_token_id", None)

327
328
329
330
331
        self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None)

        # task specific arguments
        self.task_specific_params = kwargs.pop("task_specific_params", None)

332
333
334
335
336
        # regression / multi-label classification
        self.problem_type = kwargs.pop("problem_type", None)
        allowed_problem_types = ("regression", "single_label_classification", "multi_label_classification")
        if self.problem_type is not None and self.problem_type not in allowed_problem_types:
            raise ValueError(
337
                f"The config parameter `problem_type` was not understood: received {self.problem_type} "
338
339
340
                "but only 'regression', 'single_label_classification' and 'multi_label_classification' are valid."
            )

341
        # TPU arguments
342
        if kwargs.pop("xla_device", None) is not None:
343
            logger.warning(
344
345
346
                "The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can "
                "safely remove it from your `config.json` file."
            )
347

348
349
350
        # Name or path to the pretrained checkpoint
        self._name_or_path = str(kwargs.pop("name_or_path", ""))

351
        # Drop the transformers version info
352
        self.transformers_version = kwargs.pop("transformers_version", None)
353

354
        # Deal with gradient checkpointing
355
        if kwargs.get("gradient_checkpointing", False):
356
357
358
359
360
361
            warnings.warn(
                "Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 "
                "Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the "
                "`Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`."
            )

thomwolf's avatar
thomwolf committed
362
363
364
365
366
        # Additional attributes without default values
        for key, value in kwargs.items():
            try:
                setattr(self, key, value)
            except AttributeError as err:
367
                logger.error(f"Can't set {key} with value {value} for {self}")
thomwolf's avatar
thomwolf committed
368
369
                raise err

370
371
    @property
    def name_or_path(self) -> str:
372
        return getattr(self, "_name_or_path", None)
373
374
375
376
377

    @name_or_path.setter
    def name_or_path(self, value):
        self._name_or_path = str(value)  # Make sure that name_or_path is a string (for JSON encoding)

378
    @property
379
    def use_return_dict(self) -> bool:
380
        """
381
        `bool`: Whether or not return [`~file_utils.ModelOutput`] instead of tuples.
382
        """
383
384
        # If torchscript is set, force `return_dict=False` to avoid jit errors
        return self.return_dict and not self.torchscript
385

386
    @property
387
    def num_labels(self) -> int:
388
        """
389
        `int`: The number of labels for classification models.
390
        """
391
        return len(self.id2label)
392
393

    @num_labels.setter
394
    def num_labels(self, num_labels: int):
395
        if not hasattr(self, "id2label") or self.id2label is None or len(self.id2label) != num_labels:
396
            self.id2label = {i: f"LABEL_{i}" for i in range(num_labels)}
397
            self.label2id = dict(zip(self.id2label.values(), self.id2label.keys()))
398

Sylvain Gugger's avatar
Sylvain Gugger committed
399
    def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs):
Lysandre's avatar
Lysandre committed
400
        """
401
402
        Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the
        [`~PretrainedConfig.from_pretrained`] class method.
Lysandre's avatar
Lysandre committed
403
404

        Args:
405
            save_directory (`str` or `os.PathLike`):
406
                Directory where the configuration JSON file will be saved (will be created if it does not exist).
407
            push_to_hub (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
408
                Whether or not to push your model to the Hugging Face model hub after saving it.
409

410
                <Tip warning={true}>
411

Sylvain Gugger's avatar
Sylvain Gugger committed
412
413
414
                Using `push_to_hub=True` will synchronize the repository you are pushing to with `save_directory`,
                which requires `save_directory` to be a local clone of the repo you are pushing to if it's an existing
                folder. Pass along `temp_dir=True` to use a temporary directory instead.
415
416

                </Tip>
417

Sylvain Gugger's avatar
Sylvain Gugger committed
418
            kwargs:
Sylvain Gugger's avatar
Sylvain Gugger committed
419
                Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method.
420
        """
421
        if os.path.isfile(save_directory):
422
            raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
423
424
425
426
427

        if push_to_hub:
            commit_message = kwargs.pop("commit_message", None)
            repo = self._create_or_get_repo(save_directory, **kwargs)

428
        os.makedirs(save_directory, exist_ok=True)
429
430
431
432
433
434

        # If we have a custom config, we copy the file defining it in the folder and set the attributes so it can be
        # loaded from the Hub.
        if self._auto_class is not None:
            custom_object_save(self, save_directory, config=self)

435
436
437
        # If we save using the predefined names, we can load using `from_pretrained`
        output_config_file = os.path.join(save_directory, CONFIG_NAME)

438
        self.to_json_file(output_config_file, use_diff=True)
439
        logger.info(f"Configuration saved in {output_config_file}")
440

Sylvain Gugger's avatar
Sylvain Gugger committed
441
        if push_to_hub:
442
            url = self._push_to_hub(repo, commit_message=commit_message)
Sylvain Gugger's avatar
Sylvain Gugger committed
443
444
            logger.info(f"Configuration pushed to the hub in this commit: {url}")

445
    @classmethod
446
    def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
Lysandre's avatar
Lysandre committed
447
        r"""
Sylvain Gugger's avatar
Sylvain Gugger committed
448
        Instantiate a [`PretrainedConfig`] (or a derived class) from a pretrained model configuration.
Lysandre's avatar
Lysandre committed
449
450

        Args:
451
            pretrained_model_name_or_path (`str` or `os.PathLike`):
452
453
                This can be either:

454
455
456
457
458
                - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a configuration file saved using the
                  [`~PretrainedConfig.save_pretrained`] method, e.g., `./my_model_directory/`.
Sylvain Gugger's avatar
Sylvain Gugger committed
459
                - a path or url to a saved configuration JSON *file*, e.g., `./my_model_directory/configuration.json`.
460
            cache_dir (`str` or `os.PathLike`, *optional*):
461
462
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
463
            force_download (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
464
465
                Whether or not to force to (re-)download the configuration files and override the cached versions if
                they exist.
466
            resume_download (`bool`, *optional*, defaults to `False`):
467
468
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
469
            proxies (`Dict[str, str]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
470
471
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
472
            use_auth_token (`str` or *bool*, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
473
474
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `transformers-cli login` (stored in `~/.huggingface`).
475
            revision(`str`, *optional*, defaults to `"main"`):
Julien Chaumond's avatar
Julien Chaumond committed
476
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
477
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
Julien Chaumond's avatar
Julien Chaumond committed
478
                identifier allowed by git.
479
480
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final configuration object.
481

Sylvain Gugger's avatar
Sylvain Gugger committed
482
483
484
                If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
                dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
                part of `kwargs` which has not been used to update `config` and is otherwise ignored.
485
            kwargs (`Dict[str, Any]`, *optional*):
486
                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
Sylvain Gugger's avatar
Sylvain Gugger committed
487
                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
488
                by the `return_unused_kwargs` keyword parameter.
489

490
        <Tip>
491

492
        Passing `use_auth_token=True` is required when you want to use a private model.
493

494
        </Tip>
495

Lysandre's avatar
Lysandre committed
496
        Returns:
497
498
499
500
501
502
503
            [`PretrainedConfig`]: The configuration object instantiated from this pretrained model.

        Examples:

        ```python
        # We can't instantiate directly the base class *PretrainedConfig* so let's show the examples on a
        # derived class: BertConfig
Sylvain Gugger's avatar
Sylvain Gugger committed
504
505
506
507
508
509
510
511
        config = BertConfig.from_pretrained(
            "bert-base-uncased"
        )  # Download configuration from huggingface.co and cache.
        config = BertConfig.from_pretrained(
            "./test/saved_model/"
        )  # E.g. config (or model) was saved using *save_pretrained('./test/saved_model/')*
        config = BertConfig.from_pretrained("./test/saved_model/my_configuration.json")
        config = BertConfig.from_pretrained("bert-base-uncased", output_attentions=True, foo=False)
512
        assert config.output_attentions == True
Sylvain Gugger's avatar
Sylvain Gugger committed
513
514
515
        config, unused_kwargs = BertConfig.from_pretrained(
            "bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
        )
516
        assert config.output_attentions == True
Sylvain Gugger's avatar
Sylvain Gugger committed
517
        assert unused_kwargs == {"foo": False}
518
        ```"""
Julien Chaumond's avatar
Julien Chaumond committed
519
        config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
520
        if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
521
            logger.warning(
522
523
524
                f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
                f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
            )
525

526
527
528
        return cls.from_dict(config_dict, **kwargs)

    @classmethod
529
530
531
    def get_config_dict(
        cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
532
        """
533
534
        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
        [`PretrainedConfig`] using `from_dict`.
535
536

        Parameters:
537
            pretrained_model_name_or_path (`str` or `os.PathLike`):
Lysandre's avatar
Lysandre committed
538
539
540
                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.

        Returns:
541
            `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the configuration object.
Lysandre's avatar
Lysandre committed
542

543
        """
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
        original_kwargs = copy.deepcopy(kwargs)
        # Get config dict associated with the base config file
        config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)

        # That config file may point us toward another config file to use.
        if "configuration_files" in config_dict:
            configuration_file = get_configuration_file(config_dict["configuration_files"])
            config_dict, kwargs = cls._get_config_dict(
                pretrained_model_name_or_path, _configuration_file=configuration_file, **original_kwargs
            )

        return config_dict, kwargs

    @classmethod
    def _get_config_dict(
        cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
561
562
563
564
        cache_dir = kwargs.pop("cache_dir", None)
        force_download = kwargs.pop("force_download", False)
        resume_download = kwargs.pop("resume_download", False)
        proxies = kwargs.pop("proxies", None)
565
        use_auth_token = kwargs.pop("use_auth_token", None)
566
        local_files_only = kwargs.pop("local_files_only", False)
Julien Chaumond's avatar
Julien Chaumond committed
567
        revision = kwargs.pop("revision", None)
568
569
570
571
572
573
        from_pipeline = kwargs.pop("_from_pipeline", None)
        from_auto_class = kwargs.pop("_from_auto", False)

        user_agent = {"file_type": "config", "from_auto_class": from_auto_class}
        if from_pipeline is not None:
            user_agent["using_pipeline"] = from_pipeline
574

575
576
577
578
        if is_offline_mode() and not local_files_only:
            logger.info("Offline mode: forcing local_files_only=True")
            local_files_only = True

579
        pretrained_model_name_or_path = str(pretrained_model_name_or_path)
580
        if os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
581
            config_file = pretrained_model_name_or_path
582
        else:
583
            configuration_file = kwargs.get("_configuration_file", CONFIG_NAME)
584

585
586
587
588
589
590
591
            if os.path.isdir(pretrained_model_name_or_path):
                config_file = os.path.join(pretrained_model_name_or_path, configuration_file)
            else:
                config_file = hf_bucket_url(
                    pretrained_model_name_or_path, filename=configuration_file, revision=revision, mirror=None
                )

592
        try:
593
            # Load from URL or cache if already cached
594
595
596
597
598
599
            resolved_config_file = cached_path(
                config_file,
                cache_dir=cache_dir,
                force_download=force_download,
                proxies=proxies,
                resume_download=resume_download,
600
                local_files_only=local_files_only,
601
                use_auth_token=use_auth_token,
602
                user_agent=user_agent,
603
            )
604

605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
        except RepositoryNotFoundError as err:
            logger.error(err)
            raise EnvironmentError(
                f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed on "
                "'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having "
                "permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass "
                "`use_auth_token=True`."
            )
        except RevisionNotFoundError as err:
            logger.error(err)
            raise EnvironmentError(
                f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this "
                f"model name. Check the model page at 'https://huggingface.co/{pretrained_model_name_or_path}' for "
                "available revisions."
            )
        except EntryNotFoundError as err:
            logger.error(err)
            raise EnvironmentError(
                f"{pretrained_model_name_or_path} does not appear to have a file named {configuration_file}."
            )
        except HTTPError as err:
            logger.error(err)
            raise EnvironmentError(
                "We couldn't connect to 'https://huggingface.co/' to load this model and it looks like "
                f"{pretrained_model_name_or_path} is not the path to a directory conaining a {configuration_file} "
                "file.\nCheckout your internet connection or see how to run the library in offline mode at "
                "'https://huggingface.co/docs/transformers/installation#offline-mode'."
            )
Julien Chaumond's avatar
Julien Chaumond committed
633
634
        except EnvironmentError as err:
            logger.error(err)
635
636
637
638
639
            raise EnvironmentError(
                f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from "
                "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
                f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
                f"containing a {configuration_file} file"
640
            )
641

642
643
644
        try:
            # Load config dict
            config_dict = cls._dict_from_json_file(resolved_config_file)
645
        except (json.JSONDecodeError, UnicodeDecodeError):
646
647
            raise EnvironmentError(
                f"It looks like the config file at '{resolved_config_file}' is not a valid JSON file."
648
            )
649

650
        if resolved_config_file == config_file:
651
            logger.info(f"loading configuration file {config_file}")
652
        else:
653
            logger.info(f"loading configuration file {config_file} from cache at {resolved_config_file}")
654

655
656
657
        return config_dict, kwargs

    @classmethod
658
    def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "PretrainedConfig":
Lysandre's avatar
Lysandre committed
659
        """
660
        Instantiates a [`PretrainedConfig`] from a Python dictionary of parameters.
Lysandre's avatar
Lysandre committed
661
662

        Args:
663
            config_dict (`Dict[str, Any]`):
664
                Dictionary that will be used to instantiate the configuration object. Such a dictionary can be
Sylvain Gugger's avatar
Sylvain Gugger committed
665
                retrieved from a pretrained checkpoint by leveraging the [`~PretrainedConfig.get_config_dict`] method.
666
            kwargs (`Dict[str, Any]`):
Lysandre's avatar
Lysandre committed
667
668
669
                Additional parameters from which to initialize the configuration object.

        Returns:
670
            [`PretrainedConfig`]: The configuration object instantiated from those parameters.
Lysandre's avatar
Lysandre committed
671
        """
672
673
674
675
        return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)

        config = cls(**config_dict)

676
        if hasattr(config, "pruned_heads"):
677
            config.pruned_heads = dict((int(key), value) for key, value in config.pruned_heads.items())
678
679
680
681
682
683

        # Update config with kwargs if needed
        to_remove = []
        for key, value in kwargs.items():
            if hasattr(config, key):
                setattr(config, key, value)
684
685
                if key != "torch_dtype":
                    to_remove.append(key)
686
687
688
        for key in to_remove:
            kwargs.pop(key, None)

689
        logger.info(f"Model config {config}")
690
691
692
693
694
695
        if return_unused_kwargs:
            return config, kwargs
        else:
            return config

    @classmethod
696
    def from_json_file(cls, json_file: Union[str, os.PathLike]) -> "PretrainedConfig":
Lysandre's avatar
Lysandre committed
697
        """
698
        Instantiates a [`PretrainedConfig`] from the path to a JSON file of parameters.
Lysandre's avatar
Lysandre committed
699
700

        Args:
701
            json_file (`str` or `os.PathLike`):
Lysandre's avatar
Lysandre committed
702
703
704
                Path to the JSON file containing the parameters.

        Returns:
705
            [`PretrainedConfig`]: The configuration object instantiated from that JSON file.
Lysandre's avatar
Lysandre committed
706
707

        """
708
709
        config_dict = cls._dict_from_json_file(json_file)
        return cls(**config_dict)
710
711

    @classmethod
712
    def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]):
713
        with open(json_file, "r", encoding="utf-8") as reader:
714
            text = reader.read()
715
        return json.loads(text)
716
717
718
719
720

    def __eq__(self, other):
        return self.__dict__ == other.__dict__

    def __repr__(self):
721
        return f"{self.__class__.__name__} {self.to_json_string()}"
722

723
    def to_diff_dict(self) -> Dict[str, Any]:
724
        """
Sylvain Gugger's avatar
Sylvain Gugger committed
725
726
        Removes all attributes from config which correspond to the default config attributes for better readability and
        serializes to a Python dictionary.
727
728

        Returns:
729
            `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance,
730
731
732
733
734
735
        """
        config_dict = self.to_dict()

        # get the default config dict
        default_config_dict = PretrainedConfig().to_dict()

736
737
738
        # get class specific config dict
        class_config_dict = self.__class__().to_dict() if not self.is_composition else {}

739
740
741
742
        serializable_config_dict = {}

        # only serialize values that differ from the default config
        for key, value in config_dict.items():
743
744
            if (
                key not in default_config_dict
745
                or key == "transformers_version"
746
747
748
                or value != default_config_dict[key]
                or (key in class_config_dict and value != class_config_dict[key])
            ):
749
750
                serializable_config_dict[key] = value

751
752
        self.dict_torch_dtype_to_str(serializable_config_dict)

753
754
        return serializable_config_dict

755
    def to_dict(self) -> Dict[str, Any]:
Lysandre's avatar
Lysandre committed
756
757
758
759
        """
        Serializes this instance to a Python dictionary.

        Returns:
760
            `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance.
Lysandre's avatar
Lysandre committed
761
        """
762
        output = copy.deepcopy(self.__dict__)
763
764
        if hasattr(self.__class__, "model_type"):
            output["model_type"] = self.__class__.model_type
765
766
        if "_auto_class" in output:
            del output["_auto_class"]
767
768
769
770

        # Transformers version when serializing the model
        output["transformers_version"] = __version__

771
772
        self.dict_torch_dtype_to_str(output)

773
774
        return output

775
    def to_json_string(self, use_diff: bool = True) -> str:
Lysandre's avatar
Lysandre committed
776
777
778
        """
        Serializes this instance to a JSON string.

779
        Args:
780
            use_diff (`bool`, *optional*, defaults to `True`):
Sylvain Gugger's avatar
Sylvain Gugger committed
781
782
                If set to `True`, only the difference between the config instance and the default `PretrainedConfig()`
                is serialized to JSON string.
783

Lysandre's avatar
Lysandre committed
784
        Returns:
785
            `str`: String containing all the attributes that make up this configuration instance in JSON format.
Lysandre's avatar
Lysandre committed
786
        """
787
788
789
790
791
        if use_diff is True:
            config_dict = self.to_diff_dict()
        else:
            config_dict = self.to_dict()
        return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
792

793
    def to_json_file(self, json_file_path: Union[str, os.PathLike], use_diff: bool = True):
Lysandre's avatar
Lysandre committed
794
        """
795
        Save this instance to a JSON file.
Lysandre's avatar
Lysandre committed
796
797

        Args:
798
            json_file_path (`str` or `os.PathLike`):
Lysandre's avatar
Lysandre committed
799
                Path to the JSON file in which this configuration instance's parameters will be saved.
800
            use_diff (`bool`, *optional*, defaults to `True`):
Sylvain Gugger's avatar
Sylvain Gugger committed
801
802
                If set to `True`, only the difference between the config instance and the default `PretrainedConfig()`
                is serialized to JSON file.
Lysandre's avatar
Lysandre committed
803
        """
804
        with open(json_file_path, "w", encoding="utf-8") as writer:
805
            writer.write(self.to_json_string(use_diff=use_diff))
806

807
    def update(self, config_dict: Dict[str, Any]):
808
        """
809
        Updates attributes of this class with attributes from `config_dict`.
810
811

        Args:
812
            config_dict (`Dict[str, Any]`): Dictionary of attributes that should be updated for this class.
813
814
815
        """
        for key, value in config_dict.items():
            setattr(self, key, value)
816
817
818

    def update_from_string(self, update_str: str):
        """
819
        Updates attributes of this class with attributes from `update_str`.
820

821
        The expected format is ints, floats and strings as is, and for booleans use `true` or `false`. For example:
822
823
824
825
826
        "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"

        The keys to change have to already exist in the config object.

        Args:
827
            update_str (`str`): String with attributes that should be updated for this class.
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853

        """

        d = dict(x.split("=") for x in update_str.split(","))
        for k, v in d.items():
            if not hasattr(self, k):
                raise ValueError(f"key {k} isn't in the original config dict")

            old_v = getattr(self, k)
            if isinstance(old_v, bool):
                if v.lower() in ["true", "1", "y", "yes"]:
                    v = True
                elif v.lower() in ["false", "0", "n", "no"]:
                    v = False
                else:
                    raise ValueError(f"can't derive true or false from {v} (key {k})")
            elif isinstance(old_v, int):
                v = int(v)
            elif isinstance(old_v, float):
                v = float(v)
            elif not isinstance(old_v, str):
                raise ValueError(
                    f"You can only update int, float, bool or string values in the config, got {v} for key {k}"
                )

            setattr(self, k, v)
854

855
856
    def dict_torch_dtype_to_str(self, d: Dict[str, Any]) -> None:
        """
857
        Checks whether the passed dictionary has a *torch_dtype* key and if it's not None, converts torch.dtype to a
Sylvain Gugger's avatar
Sylvain Gugger committed
858
859
        string of just the type. For example, `torch.float32` get converted into *"float32"* string, which can then be
        stored in the json format.
860
861
862
863
        """
        if d.get("torch_dtype", None) is not None and not isinstance(d["torch_dtype"], str):
            d["torch_dtype"] = str(d["torch_dtype"]).split(".")[1]

864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
    @classmethod
    def register_for_auto_class(cls, auto_class="AutoConfig"):
        """
        Register this class with a given auto class. This should only be used for custom configurations as the ones in
        the library are already mapped with `AutoConfig`.

        Args:
            auto_class (`str` or `type`, *optional*, defaults to `"AutoConfig"`):
                The auto class to register this new configuration with.
        """
        if not isinstance(auto_class, str):
            auto_class = auto_class.__name__

        import transformers.models.auto as auto_module

        if not hasattr(auto_module, auto_class):
            raise ValueError(f"{auto_class} is not a valid auto class.")

        cls._auto_class = auto_class

884

885
def get_configuration_file(configuration_files: List[str]) -> str:
886
887
888
889
    """
    Get the configuration file to use for this version of transformers.

    Args:
890
        configuration_files (`List[str]`): The list of available configuration files.
891
892

    Returns:
893
        `str`: The configuration file to use.
894
895
    """
    configuration_files_map = {}
896
    for file_name in configuration_files:
897
898
899
900
901
902
903
        search = _re_configuration_file.search(file_name)
        if search is not None:
            v = search.groups()[0]
            configuration_files_map[v] = file_name
    available_versions = sorted(configuration_files_map.keys())

    # Defaults to FULL_CONFIGURATION_FILE and then try to look at some newer versions.
904
    configuration_file = CONFIG_NAME
905
906
907
908
909
910
911
912
913
914
915
    transformers_version = version.parse(__version__)
    for v in available_versions:
        if version.parse(v) <= transformers_version:
            configuration_file = configuration_files_map[v]
        else:
            # No point going further since the versions are sorted.
            break

    return configuration_file


916
917
918
919
PretrainedConfig.push_to_hub = copy_func(PretrainedConfig.push_to_hub)
PretrainedConfig.push_to_hub.__doc__ = PretrainedConfig.push_to_hub.__doc__.format(
    object="config", object_class="AutoConfig", object_files="configuration file"
)