configuration_utils.py 45.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Configuration base class and utilities."""


import copy
import json
import os
22
import re
23
import warnings
24
from typing import Any, Dict, List, Optional, Tuple, Union
25
26

from packaging import version
27

28
29
from requests import HTTPError

30
from . import __version__
31
from .dynamic_module_utils import custom_object_save
32
33
from .file_utils import (
    CONFIG_NAME,
34
    EntryNotFoundError,
35
    PushToHubMixin,
36
37
    RepositoryNotFoundError,
    RevisionNotFoundError,
38
39
40
41
42
    cached_path,
    copy_func,
    hf_bucket_url,
    is_offline_mode,
    is_remote_url,
43
    is_torch_available,
44
)
Lysandre Debut's avatar
Lysandre Debut committed
45
from .utils import logging
Aymeric Augustin's avatar
Aymeric Augustin committed
46

47

Lysandre Debut's avatar
Lysandre Debut committed
48
logger = logging.get_logger(__name__)
49

50
_re_configuration_file = re.compile(r"config\.(.*)\.json")
51

52

Sylvain Gugger's avatar
Sylvain Gugger committed
53
class PretrainedConfig(PushToHubMixin):
Sylvain Gugger's avatar
Sylvain Gugger committed
54
55
56
    r"""
    Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as
    methods for loading/downloading/saving configurations.
Lysandre's avatar
Lysandre committed
57

Sylvain Gugger's avatar
Sylvain Gugger committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    <Tip>

    A configuration file can be loaded and saved to disk. Loading the configuration file and using this file to
    initialize a model does **not** load the model weights. It only affects the model's configuration.

    </Tip>

    Class attributes (overridden by derived classes):

    - **model_type** (`str`) -- An identifier for the model type, serialized into the JSON file, and used to recreate
      the correct object in [`~transformers.AutoConfig`].
    - **is_composition** (`bool`) -- Whether the config class is composed of multiple sub-configs. In this case the
      config has to be initialized from two or more configs of type [`~transformers.PretrainedConfig`] like:
      [`~transformers.EncoderDecoderConfig`] or [`~RagConfig`].
    - **keys_to_ignore_at_inference** (`List[str]`) -- A list of keys to ignore by default when looking at dictionary
      outputs of the model during inference.
    - **attribute_map** (`Dict[str, str]`) -- A dict that maps model specific attribute names to the standardized
      naming of attributes.

    Common attributes (present in all subclasses):

    - **vocab_size** (`int`) -- The number of tokens in the vocabulary, which is also the first dimension of the
      embeddings matrix (this attribute may be missing for models that don't have a text modality like ViT).
    - **hidden_size** (`int`) -- The hidden size of the model.
    - **num_attention_heads** (`int`) -- The number of attention heads used in the multi-head attention layers of the
      model.
    - **num_hidden_layers** (`int`) -- The number of blocks in the model.

    Arg:
87
        name_or_path (`str`, *optional*, defaults to `""`):
Sylvain Gugger's avatar
Sylvain Gugger committed
88
89
90
            Store the string that was passed to [`PreTrainedModel.from_pretrained`] or
            [`TFPreTrainedModel.from_pretrained`] as `pretrained_model_name_or_path` if the configuration was created
            with such a method.
91
        output_hidden_states (`bool`, *optional*, defaults to `False`):
Lysandre's avatar
Lysandre committed
92
            Whether or not the model should return all hidden-states.
93
        output_attentions (`bool`, *optional*, defaults to `False`):
Lysandre's avatar
Lysandre committed
94
            Whether or not the model should returns all attentions.
95
        return_dict (`bool`, *optional*, defaults to `True`):
Sylvain Gugger's avatar
Sylvain Gugger committed
96
            Whether or not the model should return a [`~transformers.file_utils.ModelOutput`] instead of a plain tuple.
97
        is_encoder_decoder (`bool`, *optional*, defaults to `False`):
Lysandre's avatar
Lysandre committed
98
            Whether the model is used as an encoder/decoder or not.
99
        is_decoder (`bool`, *optional*, defaults to `False`):
Lysandre's avatar
Lysandre committed
100
            Whether the model is used as decoder or not (in which case it's used as an encoder).
101
        cross_attention_hidden_size** (`bool`, *optional*):
102
103
            The hidden size of the cross-attention layer in case the model is used as a decoder in an encoder-decoder
            setting and the cross-attention hidden dimension differs from `self.config.hidden_size`.
104
        add_cross_attention (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
105
            Whether cross-attention layers should be added to the model. Note, this option is only relevant for models
Sylvain Gugger's avatar
Sylvain Gugger committed
106
107
            that can be used as decoder models within the [`EncoderDecoderModel`] class, which consists of all models
            in `AUTO_MODELS_FOR_CAUSAL_LM`.
108
        tie_encoder_decoder (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
109
110
            Whether all encoder weights should be tied to their equivalent decoder weights. This requires the encoder
            and decoder model to have the exact same parameter names.
111
        prune_heads (`Dict[int, List[int]]`, *optional*, defaults to `{}`):
Sylvain Gugger's avatar
Sylvain Gugger committed
112
113
            Pruned heads of the model. The keys are the selected layer indices and the associated values, the list of
            heads to prune in said layer.
Lysandre's avatar
Lysandre committed
114

Sylvain Gugger's avatar
Sylvain Gugger committed
115
            For instance `{1: [0, 2], 2: [2, 3]}` will prune heads 0 and 2 on layer 1 and heads 2 and 3 on layer 2.
116
        chunk_size_feed_forward (`int`, *optional*, defaults to `0`):
Sylvain Gugger's avatar
Sylvain Gugger committed
117
118
119
120
121
122
123
            The chunk size of all feed forward layers in the residual attention blocks. A chunk size of `0` means that
            the feed forward layer is not chunked. A chunk size of n means that the feed forward layer processes `n` <
            sequence_length embeddings at a time. For more information on feed forward chunking, see [How does Feed
            Forward Chunking work?](../glossary.html#feed-forward-chunking).

        > Parameters for sequence generation

124
        max_length (`int`, *optional*, defaults to 20):
Sylvain Gugger's avatar
Sylvain Gugger committed
125
            Maximum length that will be used by default in the `generate` method of the model.
126
        min_length (`int`, *optional*, defaults to 10):
Sylvain Gugger's avatar
Sylvain Gugger committed
127
            Minimum length that will be used by default in the `generate` method of the model.
128
        do_sample (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
129
130
            Flag that will be used by default in the `generate` method of the model. Whether or not to use sampling ;
            use greedy decoding otherwise.
131
        early_stopping (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
132
133
            Flag that will be used by default in the `generate` method of the model. Whether to stop the beam search
            when at least `num_beams` sentences are finished per batch or not.
134
        num_beams (`int`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
135
136
            Number of beams for beam search that will be used by default in the `generate` method of the model. 1 means
            no beam search.
137
        num_beam_groups (`int`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
138
139
            Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams
            that will be used by default in the `generate` method of the model. 1 means no group beam search.
140
        diversity_penalty (`float`, *optional*, defaults to 0.0):
Sylvain Gugger's avatar
Sylvain Gugger committed
141
142
            Value to control diversity for group beam search. that will be used by default in the `generate` method of
            the model. 0 means no diversity penalty. The higher the penalty, the more diverse are the outputs.
143
        temperature (`float`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
144
145
            The value used to module the next token probabilities that will be used by default in the `generate` method
            of the model. Must be strictly positive.
146
        top_k (`int`, *optional*, defaults to 50):
Sylvain Gugger's avatar
Sylvain Gugger committed
147
148
            Number of highest probability vocabulary tokens to keep for top-k-filtering that will be used by default in
            the `generate` method of the model.
149
        top_p (`float`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
150
151
            Value that will be used by default in the `generate` method of the model for `top_p`. If set to float < 1,
            only the most probable tokens with probabilities that add up to `top_p` or higher are kept for generation.
152
        repetition_penalty (`float`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
153
154
            Parameter for repetition penalty that will be used by default in the `generate` method of the model. 1.0
            means no penalty.
155
        length_penalty (`float`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
156
            Exponential penalty to the length that will be used by default in the `generate` method of the model.
157
        no_repeat_ngram_size (`int`, *optional*, defaults to 0) -- Value that will be used by default in the
Sylvain Gugger's avatar
Sylvain Gugger committed
158
159
            `generate` method of the model for `no_repeat_ngram_size`. If set to int > 0, all ngrams of that size can
            only occur once.
160
        encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0) -- Value that will be used by
Sylvain Gugger's avatar
Sylvain Gugger committed
161
162
            default in the `generate` method of the model for `encoder_no_repeat_ngram_size`. If set to int > 0, all
            ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`.
163
        bad_words_ids (`List[int]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
164
165
166
            List of token ids that are not allowed to be generated that will be used by default in the `generate`
            method of the model. In order to get the tokens of the words that should not appear in the generated text,
            use `tokenizer.encode(bad_word, add_prefix_space=True)`.
167
        num_return_sequences (`int`, *optional*, defaults to 1):
Sylvain Gugger's avatar
Sylvain Gugger committed
168
169
            Number of independently computed returned sequences for each element in the batch that will be used by
            default in the `generate` method of the model.
170
        output_scores (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
171
            Whether the model should return the logits when used for generation.
172
        return_dict_in_generate (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
173
            Whether the model should return a [`~transformers.file_utils.ModelOutput`] instead of a `torch.LongTensor`.
174
        forced_bos_token_id (`int`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
175
176
177
            The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for
            multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target
            language token.
178
        forced_eos_token_id (`int`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
179
            The id of the token to force as the last generated token when `max_length` is reached.
180
        remove_invalid_values (`bool`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
181
182
183
184
185
            Whether to remove possible _nan_ and _inf_ outputs of the model to prevent the generation method to crash.
            Note that using `remove_invalid_values` can slow down generation.

        > Parameters for fine-tuning tasks

Sylvain Gugger's avatar
Sylvain Gugger committed
186
187
        architectures (`List[str]`, *optional*):
            Model architectures that can be used with the model pretrained weights.
188
        finetuning_task (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
189
190
            Name of the task used to fine-tune the model. This can be used when converting from an original (TensorFlow
            or PyTorch) checkpoint.
191
        id2label (`Dict[int, str]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
192
            A map from index (for instance prediction index, or target index) to label.
193
194
        label2id (`Dict[str, int]`, *optional*): A map from label to index for the model.
        num_labels (`int`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
195
            Number of labels to use in the last layer added to the model, typically for a classification task.
196
        task_specific_params (`Dict[str, Any]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
197
            Additional keyword arguments to store for the current task.
198
        problem_type (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
199
200
201
202
203
            Problem type for `XxxForSequenceClassification` models. Can be one of `"regression"`,
            `"single_label_classification"` or `"multi_label_classification"`.

        > Parameters linked to the tokenizer

204
        tokenizer_class (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
205
206
            The name of the associated tokenizer class to use (if none is set, will use the tokenizer associated to the
            model by default).
207
        prefix (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
208
            A specific prompt that should be added at the beginning of each text before calling the model.
209
210
211
212
        bos_token_id (`int`, *optional*): The id of the _beginning-of-stream_ token.
        pad_token_id (`int`, *optional*): The id of the _padding_ token.
        eos_token_id (`int`, *optional*): The id of the _end-of-stream_ token.
        decoder_start_token_id (`int`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
213
            If an encoder-decoder model starts decoding with a different token than _bos_, the id of that token.
214
        sep_token_id (`int`, *optional*): The id of the _separation_ token.
Sylvain Gugger's avatar
Sylvain Gugger committed
215
216
217

        > PyTorch specific parameters

218
        torchscript (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
219
            Whether or not the model should be used with Torchscript.
220
        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
Sylvain Gugger's avatar
Sylvain Gugger committed
221
222
            Whether the model's input and output word embeddings should be tied. Note that this is only relevant if the
            model has a output word embedding layer.
223
        torch_dtype (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
224
225
226
227
228
229
230
231
232
233
234
235
            The `dtype` of the weights. This attribute can be used to initialize the model to a non-default `dtype`
            (which is normally `float32`) and thus allow for optimal storage allocation. For example, if the saved
            model is `float16`, ideally we want to load it back using the minimal amount of memory needed to load
            `float16` weights. Since the config object is stored in plain text, this attribute contains just the
            floating type string without the `torch.` prefix. For example, for `torch.float16` ``torch_dtype` is the
            `"float16"` string.

            This attribute is currently not being used during model loading time, but this may change in the future
            versions. But we can already start preparing for the future by saving the dtype with save_pretrained.

        > TensorFlow specific parameters

236
        use_bfloat16 (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
237
            Whether or not the model should use BFloat16 scalars (only used by some TensorFlow models).
238
    """
239
    model_type: str = ""
240
    is_composition: bool = False
241
    attribute_map: Dict[str, str] = {}
242
    _auto_class: Optional[str] = None
243
244
245
246
247
248
249
250
251
252

    def __setattr__(self, key, value):
        if key in super().__getattribute__("attribute_map"):
            key = super().__getattribute__("attribute_map")[key]
        super().__setattr__(key, value)

    def __getattribute__(self, key):
        if key != "attribute_map" and key in super().__getattribute__("attribute_map"):
            key = super().__getattribute__("attribute_map")[key]
        return super().__getattribute__(key)
253
254

    def __init__(self, **kwargs):
thomwolf's avatar
thomwolf committed
255
        # Attributes with defaults
256
        self.return_dict = kwargs.pop("return_dict", True)
257
        self.output_hidden_states = kwargs.pop("output_hidden_states", False)
258
        self.output_attentions = kwargs.pop("output_attentions", False)
259
        self.torchscript = kwargs.pop("torchscript", False)  # Only used by PyTorch models
260
        self.torch_dtype = kwargs.pop("torch_dtype", None)  # Only used by PyTorch models
261
262
        self.use_bfloat16 = kwargs.pop("use_bfloat16", False)
        self.pruned_heads = kwargs.pop("pruned_heads", {})
263
264
265
        self.tie_word_embeddings = kwargs.pop(
            "tie_word_embeddings", True
        )  # Whether input and output word embeddings should be tied for all MLM, LM and Seq2Seq models.
thomwolf's avatar
thomwolf committed
266
267

        # Is decoder is used in encoder-decoder models to differentiate encoder from decoder
Patrick von Platen's avatar
Patrick von Platen committed
268
        self.is_encoder_decoder = kwargs.pop("is_encoder_decoder", False)
269
        self.is_decoder = kwargs.pop("is_decoder", False)
270
        self.cross_attention_hidden_size = kwargs.pop("cross_attention_hidden_size", None)
271
        self.add_cross_attention = kwargs.pop("add_cross_attention", False)
272
        self.tie_encoder_decoder = kwargs.pop("tie_encoder_decoder", False)
273

thomwolf's avatar
thomwolf committed
274
        # Parameters for sequence generation
275
        self.max_length = kwargs.pop("max_length", 20)
Patrick von Platen's avatar
Patrick von Platen committed
276
        self.min_length = kwargs.pop("min_length", 0)
277
        self.do_sample = kwargs.pop("do_sample", False)
Patrick von Platen's avatar
Patrick von Platen committed
278
        self.early_stopping = kwargs.pop("early_stopping", False)
279
        self.num_beams = kwargs.pop("num_beams", 1)
280
281
        self.num_beam_groups = kwargs.pop("num_beam_groups", 1)
        self.diversity_penalty = kwargs.pop("diversity_penalty", 0.0)
282
283
284
285
286
        self.temperature = kwargs.pop("temperature", 1.0)
        self.top_k = kwargs.pop("top_k", 50)
        self.top_p = kwargs.pop("top_p", 1.0)
        self.repetition_penalty = kwargs.pop("repetition_penalty", 1.0)
        self.length_penalty = kwargs.pop("length_penalty", 1.0)
Patrick von Platen's avatar
Patrick von Platen committed
287
        self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size", 0)
288
        self.encoder_no_repeat_ngram_size = kwargs.pop("encoder_no_repeat_ngram_size", 0)
289
        self.bad_words_ids = kwargs.pop("bad_words_ids", None)
290
        self.num_return_sequences = kwargs.pop("num_return_sequences", 1)
Pradhy729's avatar
Pradhy729 committed
291
        self.chunk_size_feed_forward = kwargs.pop("chunk_size_feed_forward", 0)
292
293
        self.output_scores = kwargs.pop("output_scores", False)
        self.return_dict_in_generate = kwargs.pop("return_dict_in_generate", False)
294
295
        self.forced_bos_token_id = kwargs.pop("forced_bos_token_id", None)
        self.forced_eos_token_id = kwargs.pop("forced_eos_token_id", None)
296
        self.remove_invalid_values = kwargs.pop("remove_invalid_values", False)
thomwolf's avatar
thomwolf committed
297

thomwolf's avatar
thomwolf committed
298
        # Fine-tuning task arguments
Julien Chaumond's avatar
Julien Chaumond committed
299
        self.architectures = kwargs.pop("architectures", None)
300
        self.finetuning_task = kwargs.pop("finetuning_task", None)
301
302
303
        self.id2label = kwargs.pop("id2label", None)
        self.label2id = kwargs.pop("label2id", None)
        if self.id2label is not None:
304
            kwargs.pop("num_labels", None)
305
306
307
308
            self.id2label = dict((int(key), value) for key, value in self.id2label.items())
            # Keys are always strings in JSON so convert ids to int here.
        else:
            self.num_labels = kwargs.pop("num_labels", 2)
thomwolf's avatar
thomwolf committed
309

310
311
312
313
314
315
316
317
        if self.torch_dtype is not None and isinstance(self.torch_dtype, str):
            # we will start using self.torch_dtype in v5, but to be consistent with
            # from_pretrained's torch_dtype arg convert it to an actual torch.dtype object
            if is_torch_available():
                import torch

                self.torch_dtype = getattr(torch, self.torch_dtype)

318
        # Tokenizer arguments TODO: eventually tokenizer and models should share the same config
319
        self.tokenizer_class = kwargs.pop("tokenizer_class", None)
320
321
322
323
        self.prefix = kwargs.pop("prefix", None)
        self.bos_token_id = kwargs.pop("bos_token_id", None)
        self.pad_token_id = kwargs.pop("pad_token_id", None)
        self.eos_token_id = kwargs.pop("eos_token_id", None)
324
325
        self.sep_token_id = kwargs.pop("sep_token_id", None)

326
327
328
329
330
        self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None)

        # task specific arguments
        self.task_specific_params = kwargs.pop("task_specific_params", None)

331
332
333
334
335
        # regression / multi-label classification
        self.problem_type = kwargs.pop("problem_type", None)
        allowed_problem_types = ("regression", "single_label_classification", "multi_label_classification")
        if self.problem_type is not None and self.problem_type not in allowed_problem_types:
            raise ValueError(
336
                f"The config parameter `problem_type` was not understood: received {self.problem_type} "
337
338
339
                "but only 'regression', 'single_label_classification' and 'multi_label_classification' are valid."
            )

340
        # TPU arguments
341
        if kwargs.pop("xla_device", None) is not None:
342
            logger.warning(
343
344
345
                "The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can "
                "safely remove it from your `config.json` file."
            )
346

347
348
349
        # Name or path to the pretrained checkpoint
        self._name_or_path = str(kwargs.pop("name_or_path", ""))

350
        # Drop the transformers version info
351
        self.transformers_version = kwargs.pop("transformers_version", None)
352

353
        # Deal with gradient checkpointing
354
        if kwargs.get("gradient_checkpointing", False):
355
356
357
358
359
360
            warnings.warn(
                "Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 "
                "Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the "
                "`Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`."
            )

thomwolf's avatar
thomwolf committed
361
362
363
364
365
        # Additional attributes without default values
        for key, value in kwargs.items():
            try:
                setattr(self, key, value)
            except AttributeError as err:
366
                logger.error(f"Can't set {key} with value {value} for {self}")
thomwolf's avatar
thomwolf committed
367
368
                raise err

369
370
371
372
373
374
375
376
    @property
    def name_or_path(self) -> str:
        return self._name_or_path

    @name_or_path.setter
    def name_or_path(self, value):
        self._name_or_path = str(value)  # Make sure that name_or_path is a string (for JSON encoding)

377
    @property
378
    def use_return_dict(self) -> bool:
379
        """
380
        `bool`: Whether or not return [`~file_utils.ModelOutput`] instead of tuples.
381
        """
382
383
        # If torchscript is set, force `return_dict=False` to avoid jit errors
        return self.return_dict and not self.torchscript
384

385
    @property
386
    def num_labels(self) -> int:
387
        """
388
        `int`: The number of labels for classification models.
389
        """
390
        return len(self.id2label)
391
392

    @num_labels.setter
393
    def num_labels(self, num_labels: int):
394
        if not hasattr(self, "id2label") or self.id2label is None or len(self.id2label) != num_labels:
395
            self.id2label = {i: f"LABEL_{i}" for i in range(num_labels)}
396
            self.label2id = dict(zip(self.id2label.values(), self.id2label.keys()))
397

Sylvain Gugger's avatar
Sylvain Gugger committed
398
    def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs):
Lysandre's avatar
Lysandre committed
399
        """
400
401
        Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the
        [`~PretrainedConfig.from_pretrained`] class method.
Lysandre's avatar
Lysandre committed
402
403

        Args:
404
            save_directory (`str` or `os.PathLike`):
405
                Directory where the configuration JSON file will be saved (will be created if it does not exist).
406
            push_to_hub (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
407
                Whether or not to push your model to the Hugging Face model hub after saving it.
408

409
                <Tip warning={true}>
410

Sylvain Gugger's avatar
Sylvain Gugger committed
411
412
413
                Using `push_to_hub=True` will synchronize the repository you are pushing to with `save_directory`,
                which requires `save_directory` to be a local clone of the repo you are pushing to if it's an existing
                folder. Pass along `temp_dir=True` to use a temporary directory instead.
414
415

                </Tip>
416

Sylvain Gugger's avatar
Sylvain Gugger committed
417
            kwargs:
Sylvain Gugger's avatar
Sylvain Gugger committed
418
                Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method.
419
        """
420
        if os.path.isfile(save_directory):
421
            raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
422
423
424
425
426

        if push_to_hub:
            commit_message = kwargs.pop("commit_message", None)
            repo = self._create_or_get_repo(save_directory, **kwargs)

427
        os.makedirs(save_directory, exist_ok=True)
428
429
430
431
432
433

        # If we have a custom config, we copy the file defining it in the folder and set the attributes so it can be
        # loaded from the Hub.
        if self._auto_class is not None:
            custom_object_save(self, save_directory, config=self)

434
435
436
        # If we save using the predefined names, we can load using `from_pretrained`
        output_config_file = os.path.join(save_directory, CONFIG_NAME)

437
        self.to_json_file(output_config_file, use_diff=True)
438
        logger.info(f"Configuration saved in {output_config_file}")
439

Sylvain Gugger's avatar
Sylvain Gugger committed
440
        if push_to_hub:
441
            url = self._push_to_hub(repo, commit_message=commit_message)
Sylvain Gugger's avatar
Sylvain Gugger committed
442
443
            logger.info(f"Configuration pushed to the hub in this commit: {url}")

444
    @classmethod
445
    def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
Lysandre's avatar
Lysandre committed
446
        r"""
Sylvain Gugger's avatar
Sylvain Gugger committed
447
        Instantiate a [`PretrainedConfig`] (or a derived class) from a pretrained model configuration.
Lysandre's avatar
Lysandre committed
448
449

        Args:
450
            pretrained_model_name_or_path (`str` or `os.PathLike`):
451
452
                This can be either:

453
454
455
456
457
                - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a configuration file saved using the
                  [`~PretrainedConfig.save_pretrained`] method, e.g., `./my_model_directory/`.
Sylvain Gugger's avatar
Sylvain Gugger committed
458
                - a path or url to a saved configuration JSON *file*, e.g., `./my_model_directory/configuration.json`.
459
            cache_dir (`str` or `os.PathLike`, *optional*):
460
461
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
462
            force_download (`bool`, *optional*, defaults to `False`):
Sylvain Gugger's avatar
Sylvain Gugger committed
463
464
                Whether or not to force to (re-)download the configuration files and override the cached versions if
                they exist.
465
            resume_download (`bool`, *optional*, defaults to `False`):
466
467
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
468
            proxies (`Dict[str, str]`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
469
470
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
471
            use_auth_token (`str` or *bool*, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
472
473
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `transformers-cli login` (stored in `~/.huggingface`).
474
            revision(`str`, *optional*, defaults to `"main"`):
Julien Chaumond's avatar
Julien Chaumond committed
475
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
476
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
Julien Chaumond's avatar
Julien Chaumond committed
477
                identifier allowed by git.
478
479
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final configuration object.
480

Sylvain Gugger's avatar
Sylvain Gugger committed
481
482
483
                If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
                dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
                part of `kwargs` which has not been used to update `config` and is otherwise ignored.
484
            kwargs (`Dict[str, Any]`, *optional*):
485
                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
Sylvain Gugger's avatar
Sylvain Gugger committed
486
                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
487
                by the `return_unused_kwargs` keyword parameter.
488

489
        <Tip>
490

491
        Passing `use_auth_token=True` is required when you want to use a private model.
492

493
        </Tip>
494

Lysandre's avatar
Lysandre committed
495
        Returns:
496
497
498
499
500
501
502
            [`PretrainedConfig`]: The configuration object instantiated from this pretrained model.

        Examples:

        ```python
        # We can't instantiate directly the base class *PretrainedConfig* so let's show the examples on a
        # derived class: BertConfig
Sylvain Gugger's avatar
Sylvain Gugger committed
503
504
505
506
507
508
509
510
        config = BertConfig.from_pretrained(
            "bert-base-uncased"
        )  # Download configuration from huggingface.co and cache.
        config = BertConfig.from_pretrained(
            "./test/saved_model/"
        )  # E.g. config (or model) was saved using *save_pretrained('./test/saved_model/')*
        config = BertConfig.from_pretrained("./test/saved_model/my_configuration.json")
        config = BertConfig.from_pretrained("bert-base-uncased", output_attentions=True, foo=False)
511
        assert config.output_attentions == True
Sylvain Gugger's avatar
Sylvain Gugger committed
512
513
514
        config, unused_kwargs = BertConfig.from_pretrained(
            "bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
        )
515
        assert config.output_attentions == True
Sylvain Gugger's avatar
Sylvain Gugger committed
516
        assert unused_kwargs == {"foo": False}
517
        ```"""
Julien Chaumond's avatar
Julien Chaumond committed
518
        config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
519
520
521
522
523
        if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
            logger.warn(
                f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
                f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
            )
524

525
526
527
        return cls.from_dict(config_dict, **kwargs)

    @classmethod
528
529
530
    def get_config_dict(
        cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
531
        """
532
533
        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
        [`PretrainedConfig`] using `from_dict`.
534
535

        Parameters:
536
            pretrained_model_name_or_path (`str` or `os.PathLike`):
Lysandre's avatar
Lysandre committed
537
538
539
                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.

        Returns:
540
            `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the configuration object.
Lysandre's avatar
Lysandre committed
541

542
        """
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
        original_kwargs = copy.deepcopy(kwargs)
        # Get config dict associated with the base config file
        config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)

        # That config file may point us toward another config file to use.
        if "configuration_files" in config_dict:
            configuration_file = get_configuration_file(config_dict["configuration_files"])
            config_dict, kwargs = cls._get_config_dict(
                pretrained_model_name_or_path, _configuration_file=configuration_file, **original_kwargs
            )

        return config_dict, kwargs

    @classmethod
    def _get_config_dict(
        cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
560
561
562
563
        cache_dir = kwargs.pop("cache_dir", None)
        force_download = kwargs.pop("force_download", False)
        resume_download = kwargs.pop("resume_download", False)
        proxies = kwargs.pop("proxies", None)
564
        use_auth_token = kwargs.pop("use_auth_token", None)
565
        local_files_only = kwargs.pop("local_files_only", False)
Julien Chaumond's avatar
Julien Chaumond committed
566
        revision = kwargs.pop("revision", None)
567
568
569
570
571
572
        from_pipeline = kwargs.pop("_from_pipeline", None)
        from_auto_class = kwargs.pop("_from_auto", False)

        user_agent = {"file_type": "config", "from_auto_class": from_auto_class}
        if from_pipeline is not None:
            user_agent["using_pipeline"] = from_pipeline
573

574
575
576
577
        if is_offline_mode() and not local_files_only:
            logger.info("Offline mode: forcing local_files_only=True")
            local_files_only = True

578
        pretrained_model_name_or_path = str(pretrained_model_name_or_path)
579
        if os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
580
            config_file = pretrained_model_name_or_path
581
        else:
582
            configuration_file = kwargs.get("_configuration_file", CONFIG_NAME)
583

584
585
586
587
588
589
590
            if os.path.isdir(pretrained_model_name_or_path):
                config_file = os.path.join(pretrained_model_name_or_path, configuration_file)
            else:
                config_file = hf_bucket_url(
                    pretrained_model_name_or_path, filename=configuration_file, revision=revision, mirror=None
                )

591
        try:
592
            # Load from URL or cache if already cached
593
594
595
596
597
598
            resolved_config_file = cached_path(
                config_file,
                cache_dir=cache_dir,
                force_download=force_download,
                proxies=proxies,
                resume_download=resume_download,
599
                local_files_only=local_files_only,
600
                use_auth_token=use_auth_token,
601
                user_agent=user_agent,
602
            )
603

604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
        except RepositoryNotFoundError as err:
            logger.error(err)
            raise EnvironmentError(
                f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed on "
                "'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having "
                "permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass "
                "`use_auth_token=True`."
            )
        except RevisionNotFoundError as err:
            logger.error(err)
            raise EnvironmentError(
                f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this "
                f"model name. Check the model page at 'https://huggingface.co/{pretrained_model_name_or_path}' for "
                "available revisions."
            )
        except EntryNotFoundError as err:
            logger.error(err)
            raise EnvironmentError(
                f"{pretrained_model_name_or_path} does not appear to have a file named {configuration_file}."
            )
        except HTTPError as err:
            logger.error(err)
            raise EnvironmentError(
                "We couldn't connect to 'https://huggingface.co/' to load this model and it looks like "
                f"{pretrained_model_name_or_path} is not the path to a directory conaining a {configuration_file} "
                "file.\nCheckout your internet connection or see how to run the library in offline mode at "
                "'https://huggingface.co/docs/transformers/installation#offline-mode'."
            )
Julien Chaumond's avatar
Julien Chaumond committed
632
633
        except EnvironmentError as err:
            logger.error(err)
634
635
636
637
638
            raise EnvironmentError(
                f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from "
                "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
                f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
                f"containing a {configuration_file} file"
639
            )
640

641
642
643
        try:
            # Load config dict
            config_dict = cls._dict_from_json_file(resolved_config_file)
644
        except (json.JSONDecodeError, UnicodeDecodeError):
645
646
            raise EnvironmentError(
                f"It looks like the config file at '{resolved_config_file}' is not a valid JSON file."
647
            )
648

649
        if resolved_config_file == config_file:
650
            logger.info(f"loading configuration file {config_file}")
651
        else:
652
            logger.info(f"loading configuration file {config_file} from cache at {resolved_config_file}")
653

654
655
656
        return config_dict, kwargs

    @classmethod
657
    def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "PretrainedConfig":
Lysandre's avatar
Lysandre committed
658
        """
659
        Instantiates a [`PretrainedConfig`] from a Python dictionary of parameters.
Lysandre's avatar
Lysandre committed
660
661

        Args:
662
            config_dict (`Dict[str, Any]`):
663
                Dictionary that will be used to instantiate the configuration object. Such a dictionary can be
Sylvain Gugger's avatar
Sylvain Gugger committed
664
                retrieved from a pretrained checkpoint by leveraging the [`~PretrainedConfig.get_config_dict`] method.
665
            kwargs (`Dict[str, Any]`):
Lysandre's avatar
Lysandre committed
666
667
668
                Additional parameters from which to initialize the configuration object.

        Returns:
669
            [`PretrainedConfig`]: The configuration object instantiated from those parameters.
Lysandre's avatar
Lysandre committed
670
        """
671
672
673
674
        return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)

        config = cls(**config_dict)

675
        if hasattr(config, "pruned_heads"):
676
            config.pruned_heads = dict((int(key), value) for key, value in config.pruned_heads.items())
677
678
679
680
681
682

        # Update config with kwargs if needed
        to_remove = []
        for key, value in kwargs.items():
            if hasattr(config, key):
                setattr(config, key, value)
683
684
                if key != "torch_dtype":
                    to_remove.append(key)
685
686
687
        for key in to_remove:
            kwargs.pop(key, None)

688
        logger.info(f"Model config {config}")
689
690
691
692
693
694
        if return_unused_kwargs:
            return config, kwargs
        else:
            return config

    @classmethod
695
    def from_json_file(cls, json_file: Union[str, os.PathLike]) -> "PretrainedConfig":
Lysandre's avatar
Lysandre committed
696
        """
697
        Instantiates a [`PretrainedConfig`] from the path to a JSON file of parameters.
Lysandre's avatar
Lysandre committed
698
699

        Args:
700
            json_file (`str` or `os.PathLike`):
Lysandre's avatar
Lysandre committed
701
702
703
                Path to the JSON file containing the parameters.

        Returns:
704
            [`PretrainedConfig`]: The configuration object instantiated from that JSON file.
Lysandre's avatar
Lysandre committed
705
706

        """
707
708
        config_dict = cls._dict_from_json_file(json_file)
        return cls(**config_dict)
709
710

    @classmethod
711
    def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]):
712
        with open(json_file, "r", encoding="utf-8") as reader:
713
            text = reader.read()
714
        return json.loads(text)
715
716
717
718
719

    def __eq__(self, other):
        return self.__dict__ == other.__dict__

    def __repr__(self):
720
        return f"{self.__class__.__name__} {self.to_json_string()}"
721

722
    def to_diff_dict(self) -> Dict[str, Any]:
723
        """
Sylvain Gugger's avatar
Sylvain Gugger committed
724
725
        Removes all attributes from config which correspond to the default config attributes for better readability and
        serializes to a Python dictionary.
726
727

        Returns:
728
            `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance,
729
730
731
732
733
734
        """
        config_dict = self.to_dict()

        # get the default config dict
        default_config_dict = PretrainedConfig().to_dict()

735
736
737
        # get class specific config dict
        class_config_dict = self.__class__().to_dict() if not self.is_composition else {}

738
739
740
741
        serializable_config_dict = {}

        # only serialize values that differ from the default config
        for key, value in config_dict.items():
742
743
            if (
                key not in default_config_dict
744
                or key == "transformers_version"
745
746
747
                or value != default_config_dict[key]
                or (key in class_config_dict and value != class_config_dict[key])
            ):
748
749
                serializable_config_dict[key] = value

750
751
        self.dict_torch_dtype_to_str(serializable_config_dict)

752
753
        return serializable_config_dict

754
    def to_dict(self) -> Dict[str, Any]:
Lysandre's avatar
Lysandre committed
755
756
757
758
        """
        Serializes this instance to a Python dictionary.

        Returns:
759
            `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance.
Lysandre's avatar
Lysandre committed
760
        """
761
        output = copy.deepcopy(self.__dict__)
762
763
        if hasattr(self.__class__, "model_type"):
            output["model_type"] = self.__class__.model_type
764
765
        if "_auto_class" in output:
            del output["_auto_class"]
766
767
768
769

        # Transformers version when serializing the model
        output["transformers_version"] = __version__

770
771
        self.dict_torch_dtype_to_str(output)

772
773
        return output

774
    def to_json_string(self, use_diff: bool = True) -> str:
Lysandre's avatar
Lysandre committed
775
776
777
        """
        Serializes this instance to a JSON string.

778
        Args:
779
            use_diff (`bool`, *optional*, defaults to `True`):
Sylvain Gugger's avatar
Sylvain Gugger committed
780
781
                If set to `True`, only the difference between the config instance and the default `PretrainedConfig()`
                is serialized to JSON string.
782

Lysandre's avatar
Lysandre committed
783
        Returns:
784
            `str`: String containing all the attributes that make up this configuration instance in JSON format.
Lysandre's avatar
Lysandre committed
785
        """
786
787
788
789
790
        if use_diff is True:
            config_dict = self.to_diff_dict()
        else:
            config_dict = self.to_dict()
        return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
791

792
    def to_json_file(self, json_file_path: Union[str, os.PathLike], use_diff: bool = True):
Lysandre's avatar
Lysandre committed
793
        """
794
        Save this instance to a JSON file.
Lysandre's avatar
Lysandre committed
795
796

        Args:
797
            json_file_path (`str` or `os.PathLike`):
Lysandre's avatar
Lysandre committed
798
                Path to the JSON file in which this configuration instance's parameters will be saved.
799
            use_diff (`bool`, *optional*, defaults to `True`):
Sylvain Gugger's avatar
Sylvain Gugger committed
800
801
                If set to `True`, only the difference between the config instance and the default `PretrainedConfig()`
                is serialized to JSON file.
Lysandre's avatar
Lysandre committed
802
        """
803
        with open(json_file_path, "w", encoding="utf-8") as writer:
804
            writer.write(self.to_json_string(use_diff=use_diff))
805

806
    def update(self, config_dict: Dict[str, Any]):
807
        """
808
        Updates attributes of this class with attributes from `config_dict`.
809
810

        Args:
811
            config_dict (`Dict[str, Any]`): Dictionary of attributes that should be updated for this class.
812
813
814
        """
        for key, value in config_dict.items():
            setattr(self, key, value)
815
816
817

    def update_from_string(self, update_str: str):
        """
818
        Updates attributes of this class with attributes from `update_str`.
819

820
        The expected format is ints, floats and strings as is, and for booleans use `true` or `false`. For example:
821
822
823
824
825
        "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"

        The keys to change have to already exist in the config object.

        Args:
826
            update_str (`str`): String with attributes that should be updated for this class.
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852

        """

        d = dict(x.split("=") for x in update_str.split(","))
        for k, v in d.items():
            if not hasattr(self, k):
                raise ValueError(f"key {k} isn't in the original config dict")

            old_v = getattr(self, k)
            if isinstance(old_v, bool):
                if v.lower() in ["true", "1", "y", "yes"]:
                    v = True
                elif v.lower() in ["false", "0", "n", "no"]:
                    v = False
                else:
                    raise ValueError(f"can't derive true or false from {v} (key {k})")
            elif isinstance(old_v, int):
                v = int(v)
            elif isinstance(old_v, float):
                v = float(v)
            elif not isinstance(old_v, str):
                raise ValueError(
                    f"You can only update int, float, bool or string values in the config, got {v} for key {k}"
                )

            setattr(self, k, v)
853

854
855
    def dict_torch_dtype_to_str(self, d: Dict[str, Any]) -> None:
        """
856
        Checks whether the passed dictionary has a *torch_dtype* key and if it's not None, converts torch.dtype to a
Sylvain Gugger's avatar
Sylvain Gugger committed
857
858
        string of just the type. For example, `torch.float32` get converted into *"float32"* string, which can then be
        stored in the json format.
859
860
861
862
        """
        if d.get("torch_dtype", None) is not None and not isinstance(d["torch_dtype"], str):
            d["torch_dtype"] = str(d["torch_dtype"]).split(".")[1]

863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
    @classmethod
    def register_for_auto_class(cls, auto_class="AutoConfig"):
        """
        Register this class with a given auto class. This should only be used for custom configurations as the ones in
        the library are already mapped with `AutoConfig`.

        Args:
            auto_class (`str` or `type`, *optional*, defaults to `"AutoConfig"`):
                The auto class to register this new configuration with.
        """
        if not isinstance(auto_class, str):
            auto_class = auto_class.__name__

        import transformers.models.auto as auto_module

        if not hasattr(auto_module, auto_class):
            raise ValueError(f"{auto_class} is not a valid auto class.")

        cls._auto_class = auto_class

883

884
def get_configuration_file(configuration_files: List[str]) -> str:
885
886
887
888
    """
    Get the configuration file to use for this version of transformers.

    Args:
889
        configuration_files (`List[str]`): The list of available configuration files.
890
891

    Returns:
892
        `str`: The configuration file to use.
893
894
    """
    configuration_files_map = {}
895
    for file_name in configuration_files:
896
897
898
899
900
901
902
        search = _re_configuration_file.search(file_name)
        if search is not None:
            v = search.groups()[0]
            configuration_files_map[v] = file_name
    available_versions = sorted(configuration_files_map.keys())

    # Defaults to FULL_CONFIGURATION_FILE and then try to look at some newer versions.
903
    configuration_file = CONFIG_NAME
904
905
906
907
908
909
910
911
912
913
914
    transformers_version = version.parse(__version__)
    for v in available_versions:
        if version.parse(v) <= transformers_version:
            configuration_file = configuration_files_map[v]
        else:
            # No point going further since the versions are sorted.
            break

    return configuration_file


915
916
917
918
PretrainedConfig.push_to_hub = copy_func(PretrainedConfig.push_to_hub)
PretrainedConfig.push_to_hub.__doc__ = PretrainedConfig.push_to_hub.__doc__.format(
    object="config", object_class="AutoConfig", object_files="configuration file"
)