[doc] consistent True/False/None default format (#14951)

* [doc] consistent True/False/None default format * Update src/transformers/models/xlnet/modeling_xlnet.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

[doc] consistent True/False/None default format (#14951)
* [doc] consistent True/False/None default format * Update src/transformers/models/xlnet/modeling_xlnet.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
133c5e40 · Stas Bekman · GitHub · b2f50025 · 133c5e40 · 133c5e40
Unverified Commit 133c5e40 authored Dec 27, 2021 by Stas Bekman Committed by GitHub Dec 27, 2021
10 changed files
--- a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py
+++ b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py
@@ -272,7 +272,7 @@ class VisionEncoderDecoderModel(PreTrainedModel):
                      `config` argument. This loading path is slower than converting the TensorFlow checkpoint in a
                      PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.

-            decoder_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to *None*):
+            decoder_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
                Information necessary to initiate the text decoder. Can be either:

                    - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.

--- a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py
+++ b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py
@@ -403,7 +403,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel):
    ) -> FlaxPreTrainedModel:
        """
        Params:
-            vision_model_name_or_path (:obj: *str*, *optional*, defaults to *None*):
+            vision_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
                Information necessary to initiate the vision model. Can be either:

                    - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.

--- a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py
+++ b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py
@@ -404,7 +404,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
    ) -> PreTrainedModel:
        """
        Params:
-            vision_model_name_or_path (:obj: *str*, *optional*, defaults to *None*):
+            vision_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
                Information necessary to initiate the vision model. Can be either:

                    - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.

--- a/src/transformers/models/xlnet/modeling_xlnet.py
+++ b/src/transformers/models/xlnet/modeling_xlnet.py
@@ -1383,7 +1383,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
        r"""
        labels (`torch.LongTensor` of shape `(batch_size, num_predict)`, *optional*):
            Labels for masked language modeling. `num_predict` corresponds to `target_mapping.shape[1]`. If
-            `target_mapping` is :obj*None*, then `num_predict` corresponds to `sequence_length`.
+            `target_mapping` is `None`, then `num_predict` corresponds to `sequence_length`.

            The labels should correspond to the masked input words that should be predicted and depends on
            `target_mapping`. Note in order to perform standard auto-regressive language modeling a *<mask>* token has

--- a/src/transformers/optimization.py
+++ b/src/transformers/optimization.py
@@ -285,7 +285,7 @@ class AdamW(Optimizer):
            Adam's epsilon for numerical stability.
        weight_decay (`float`, *optional*, defaults to 0):
            Decoupled weight decay to apply.
-        correct_bias (`bool`, *optional*, defaults to *True*):
+        correct_bias (`bool`, *optional*, defaults to `True`):
            Whether or not to correct bias in Adam (for instance, in Bert TF repository they use `False`).
    """


--- a/src/transformers/optimization_tf.py
+++ b/src/transformers/optimization_tf.py
@@ -168,7 +168,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
            The beta2 parameter in Adam, which is the exponential decay rate for the 2nd momentum estimates.
        epsilon (`float`, *optional*, defaults to 1e-7):
            The epsilon parameter in Adam, which is a small constant for numerical stability.
-        amsgrad (`bool`, *optional*, default to *False*):
+        amsgrad (`bool`, *optional*, default to `False`):
            Whether to apply AMSGrad variant of this algorithm or not, see [On the Convergence of Adam and
            Beyond](https://arxiv.org/abs/1904.09237).
        weight_decay_rate (`float`, *optional*, defaults to 0):

--- a/src/transformers/pipelines/audio_classification.py
+++ b/src/transformers/pipelines/audio_classification.py
@@ -104,7 +104,7 @@ class AudioClassificationPipeline(Pipeline):
                requires *ffmpeg* to be installed on the system. If *inputs* is `bytes` it is supposed to be the
                content of an audio file and is interpreted by *ffmpeg* in the same way.
            top_k (`int`, *optional*, defaults to None):
-                The number of top labels that will be returned by the pipeline. If the provided number is *None* or
+                The number of top labels that will be returned by the pipeline. If the provided number is `None` or
                higher than the number of labels available in the model configuration, it will default to the number of
                labels.


--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2868,7 +2868,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        Prepares a sequence of input id, or a pair of sequences of inputs ids so that it can be used by the model. It
        adds special tokens, truncates sequences if overflowing while taking into account the special tokens and
        manages a moving window (with user defined stride) for overflowing tokens. Please Note, for *pair_ids*
-        different than *None* and *truncation_strategy = longest_first* or *True*, it is not possible to return
+        different than `None` and *truncation_strategy = longest_first* or `True`, it is not possible to return
        overflowing tokens. Such a combination of arguments will raise an error.

        Args:

--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -1034,7 +1034,7 @@ class Trainer:
        Args:
            resume_from_checkpoint (`str` or `bool`, *optional*):
                If a `str`, local path to a saved checkpoint as saved by a previous instance of [`Trainer`]. If a
-                `bool` and equals *True*, load the last checkpoint in *args.output_dir* as saved by a previous instance
+                `bool` and equals `True`, load the last checkpoint in *args.output_dir* as saved by a previous instance
                of [`Trainer`]. If present, training will resume from the model/optimizer/scheduler states loaded here.
            trial (`optuna.Trial` or `Dict[str, Any]`, *optional*):
                The trial run or the hyperparameter dictionary for hyperparameter search.

--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -105,7 +105,7 @@ class TrainingArguments:
                - `"steps"`: Evaluation is done (and logged) every `eval_steps`.
                - `"epoch"`: Evaluation is done at the end of each epoch.

-        prediction_loss_only (`bool`, *optional*, defaults to *False*):
+        prediction_loss_only (`bool`, *optional*, defaults to `False`):
            When performing evaluation and generating predictions, only returns the loss.
        per_device_train_batch_size (`int`, *optional*, defaults to 8):
            The batch size per GPU/TPU core/CPU for training.
@@ -175,7 +175,7 @@ class TrainingArguments:
        logging_steps (`int`, *optional*, defaults to 500):
            Number of update steps between two logs if `logging_strategy="steps"`.
        logging_nan_inf_filter (`bool`, *optional*, defaults to `True`):
-            Whether to filter `nan` and `inf` losses for logging. If set to obj:*True* the loss of every step that is
+            Whether to filter `nan` and `inf` losses for logging. If set to obj:`True` the loss of every step that is
            `nan` or `inf` is filtered and the average loss of the current logging window is taken instead.

            <Tip>