Mass conversion of documentation from rst to Markdown (#14866)

* Convert docstrings of all configurations and tokenizers * Processors and fixes * Last modeling files and fixes to models * Pipeline modules * Utils files * Data submodule * All the other files * Style * Missing examples * Style again * Fix copies * Say bye bye to rst docstrings forever

Mass conversion of documentation from rst to Markdown (#14866)
* Convert docstrings of all configurations and tokenizers * Processors and fixes * Last modeling files and fixes to models * Pipeline modules * Utils files * Data submodule * All the other files * Style * Missing examples * Style again * Fix copies * Say bye bye to rst docstrings forever
27b3031d · Sylvain Gugger · GitHub · 18587639 · 27b3031d · 27b3031d
Unverified Commit 27b3031d authored Dec 21, 2021 by Sylvain Gugger Committed by GitHub Dec 21, 2021
20 changed files
--- a/src/transformers/models/hubert/modeling_hubert.py
+++ b/src/transformers/models/hubert/modeling_hubert.py
@@ -977,26 +977,27 @@ class HubertModel(HubertPreTrainedModel):

        Returns:

-        Example::
+        Example:

-            >>> from transformers import Wav2Vec2Processor, HubertModel
-            >>> from datasets import load_dataset
-            >>> import soundfile as sf
+        ```python
+        >>> from transformers import Wav2Vec2Processor, HubertModel
+        >>> from datasets import load_dataset
+        >>> import soundfile as sf

-            >>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
-            >>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")
+        >>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
+        >>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")

-            >>> def map_to_array(batch):
-            ...     speech, _ = sf.read(batch["file"])
-            ...     batch["speech"] = speech
-            ...     return batch
+        >>> def map_to_array(batch):
+        ...     speech, _ = sf.read(batch["file"])
+        ...     batch["speech"] = speech
+        ...     return batch

-            >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
-            >>> ds = ds.map(map_to_array)
+        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        >>> ds = ds.map(map_to_array)

-            >>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
-            >>> hidden_states = model(input_values).last_hidden_state
-        """
+        >>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
+        >>> hidden_states = model(input_values).last_hidden_state
+        ```"""
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -1405,26 +1405,27 @@ class TFHubertModel(TFHubertPreTrainedModel):

        Returns:

-        Example::
+        Example:

-            >>> from transformers import Wav2Vec2Processor, TFHubertModel
-            >>> from datasets import load_dataset
-            >>> import soundfile as sf
+        ```python
+        >>> from transformers import Wav2Vec2Processor, TFHubertModel
+        >>> from datasets import load_dataset
+        >>> import soundfile as sf

-            >>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-base-960h")
-            >>> model = TFHubertModel.from_pretrained("facebook/hubert-base-960h")
+        >>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-base-960h")
+        >>> model = TFHubertModel.from_pretrained("facebook/hubert-base-960h")

-            >>> def map_to_array(batch):
-            ...     speech, _ = sf.read(batch["file"])
-            ...     batch["speech"] = speech
-            ...     return batch
+        >>> def map_to_array(batch):
+        ...     speech, _ = sf.read(batch["file"])
+        ...     batch["speech"] = speech
+        ...     return batch

-            >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
-            >>> ds = ds.map(map_to_array)
+        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        >>> ds = ds.map(map_to_array)

-            >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
-            >>> hidden_states = model(input_values).last_hidden_state
-        """
+        >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
+        >>> hidden_states = model(input_values).last_hidden_state
+        ```"""

        inputs = input_values_processing(
            func=self.call,

--- a/src/transformers/models/ibert/configuration_ibert.py
+++ b/src/transformers/models/ibert/configuration_ibert.py
@@ -31,55 +31,53 @@ IBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {

 class IBertConfig(PretrainedConfig):
    """
-    This is the configuration class to store the configuration of a :class:`~transformers.IBertModel`. It is used to
+    This is the configuration class to store the configuration of a [`IBertModel`]. It is used to
    instantiate a I-BERT model according to the specified arguments,

-    Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
-    outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
+    outputs. Read the documentation from [`PretrainedConfig`] for more information.

    Args:
-        vocab_size (:obj:`int`, `optional`, defaults to 30522):
+        vocab_size (`int`, *optional*, defaults to 30522):
            Vocabulary size of the I-BERT model. Defines the number of different tokens that can be represented by the
-            :obj:`inputs_ids` passed when calling :class:`~transformers.IBertModel`
-        hidden_size (:obj:`int`, `optional`, defaults to 768):
+            `inputs_ids` passed when calling [`IBertModel`]
+        hidden_size (`int`, *optional*, defaults to 768):
            Dimensionality of the encoder layers and the pooler layer.
-        num_hidden_layers (:obj:`int`, `optional`, defaults to 12):
+        num_hidden_layers (`int`, *optional*, defaults to 12):
            Number of hidden layers in the Transformer encoder.
-        num_attention_heads (:obj:`int`, `optional`, defaults to 12):
+        num_attention_heads (`int`, *optional*, defaults to 12):
            Number of attention heads for each attention layer in the Transformer encoder.
-        intermediate_size (:obj:`int`, `optional`, defaults to 3072):
+        intermediate_size (`int`, *optional*, defaults to 3072):
            Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
-        hidden_act (:obj:`str` or :obj:`Callable`, `optional`, defaults to :obj:`"gelu"`):
+        hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
-            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"` and :obj:`"gelu_new"` are supported.
-        hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
+            `"gelu"`, `"relu"`, `"silu"` and `"gelu_new"` are supported.
+        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
-        attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
+        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
            The dropout ratio for the attention probabilities.
-        max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
+        max_position_embeddings (`int`, *optional*, defaults to 512):
            The maximum sequence length that this model might ever be used with. Typically set this to something large
            just in case (e.g., 512 or 1024 or 2048).
-        type_vocab_size (:obj:`int`, `optional`, defaults to 2):
-            The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.IBertModel`
-        initializer_range (:obj:`float`, `optional`, defaults to 0.02):
+        type_vocab_size (`int`, *optional*, defaults to 2):
+            The vocabulary size of the `token_type_ids` passed when calling [`IBertModel`]
+        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
+        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
            The epsilon used by the layer normalization layers.
-        position_embedding_type (:obj:`str`, `optional`, defaults to :obj:`"absolute"`):
-            Type of position embedding. Choose one of :obj:`"absolute"`, :obj:`"relative_key"`,
-            :obj:`"relative_key_query"`. For positional embeddings use :obj:`"absolute"`. For more information on
-            :obj:`"relative_key"`, please refer to `Self-Attention with Relative Position Representations (Shaw et al.)
-            <https://arxiv.org/abs/1803.02155>`__. For more information on :obj:`"relative_key_query"`, please refer to
-            `Method 4` in `Improve Transformer Models with Better Relative Position Embeddings (Huang et al.)
-            <https://arxiv.org/abs/2009.13658>`__.
-        quant_mode (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
+            Type of position embedding. Choose one of `"absolute"`, `"relative_key"`,
+            `"relative_key_query"`. For positional embeddings use `"absolute"`. For more information on
+            `"relative_key"`, please refer to [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155). For more information on `"relative_key_query"`, please refer to
+            *Method 4* in [Improve Transformer Models with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
+        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether to quantize the model or not.
-        force_dequant (:obj:`str`, `optional`, defaults to :obj:`"none"`):
+        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize specific nonlinear layer. Dequatized layers are then executed with full precision.
-            :obj:`"none"`, :obj:`"gelu"`, :obj:`"softmax"`, :obj:`"layernorm"` and :obj:`"nonlinear"` are supported. As
-            deafult, it is set as :obj:`"none"`, which does not dequantize any layers. Please specify :obj:`"gelu"`,
-            :obj:`"softmax"`, or :obj:`"layernorm"` to dequantize GELU, Softmax, or LayerNorm, respectively.
-            :obj:`"nonlinear"` will dequantize all nonlinear layers, i.e., GELU, Softmax, and LayerNorm.
+            `"none"`, `"gelu"`, `"softmax"`, `"layernorm"` and `"nonlinear"` are supported. As
+            deafult, it is set as `"none"`, which does not dequantize any layers. Please specify `"gelu"`,
+            `"softmax"`, or `"layernorm"` to dequantize GELU, Softmax, or LayerNorm, respectively.
+            `"nonlinear"` will dequantize all nonlinear layers, i.e., GELU, Softmax, and LayerNorm.
    """

    model_type = "ibert"

--- a/src/transformers/models/ibert/quant_modules.py
+++ b/src/transformers/models/ibert/quant_modules.py
@@ -30,15 +30,15 @@ logger = logging.get_logger(__name__)

 class QuantEmbedding(nn.Module):
    """
-    Quantized version of :obj:`torch.nn.Embedding`. Adds quantization-specific arguments on top of
-    :obj:`torch.nn.Embedding`.
+    Quantized version of `torch.nn.Embedding`. Adds quantization-specific arguments on top of
+    `torch.nn.Embedding`.

    Args:
-        weight_bit (:obj:`int`, `optional`, defaults to :obj:`8`):
+        weight_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the quantized weight.
-        momentum (:obj:`float`, `optional`, defaults to :obj:`0.95`):
+        momentum (`float`, *optional*, defaults to `0.95`):
            Momentum for updating the activation quantization range.
-        quant_mode (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    """

@@ -117,15 +117,15 @@ class QuantAct(nn.Module):
    Quantizes the given activation.

    Args:
-        activation_bit (:obj:`int`):
+        activation_bit (`int`):
            Bitwidth for the quantized activation.
-        act_range_momentum (:obj:`float`, `optional`, defaults to :obj:`0.95`):
+        act_range_momentum (`float`, *optional*, defaults to `0.95`):
            Momentum for updating the activation quantization range.
-        per_channel (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        per_channel (`bool`, *optional*, defaults to `False`):
            Whether to or not use channel-wise quantization.
-        channel_len (:obj:`int`, `optional`):
-            Specify the channel length when set the `per_channel` True.
-        quant_mode (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        channel_len (`int`, *optional*):
+            Specify the channel length when set the *per_channel* True.
+        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    """

@@ -221,16 +221,16 @@ class QuantAct(nn.Module):

 class QuantLinear(nn.Module):
    """
-    Quantized version of :obj:`torch.nn.Linear`. Adds quantization-specific arguments on top of :obj:`torch.nn.Linear`.
+    Quantized version of `torch.nn.Linear`. Adds quantization-specific arguments on top of `torch.nn.Linear`.

    Args:
-        weight_bit (:obj:`int`, `optional`, defaults to :obj:`8`):
+        weight_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the quantized weight.
-        bias_bit (:obj:`int`, `optional`, defaults to :obj:`32`):
+        bias_bit (`int`, *optional*, defaults to `32`):
            Bitwidth for the quantized bias.
-        per_channel (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        per_channel (`bool`, *optional*, defaults to `False`):
            Whether or not to use channel-wise quantization.
-        quant_mode (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    """

@@ -301,12 +301,12 @@ class QuantLinear(nn.Module):

 class IntGELU(nn.Module):
    """
-    Quantized version of :obj:`torch.nn.GELU`. Adds quantization-specific arguments on top of :obj:`torch.nn.GELU`.
+    Quantized version of `torch.nn.GELU`. Adds quantization-specific arguments on top of `torch.nn.GELU`.

    Args:
-        quant_mode (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
-        force_dequant (:obj:`str`, `optional`, defaults to :obj:`"none"`):
+        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "gelu" or "nonlinear" is given.
    """

@@ -358,15 +358,15 @@ class IntGELU(nn.Module):

 class IntSoftmax(nn.Module):
    """
-    Quantized version of :obj:`torch.nn.Softmax`. Adds quantization-specific arguments on top of
-    :obj:`torch.nn.Softmax`.
+    Quantized version of `torch.nn.Softmax`. Adds quantization-specific arguments on top of
+    `torch.nn.Softmax`.

    Args:
-        output_bit (:obj:`int`):
+        output_bit (`int`):
            Bitwidth for the layer output activation.
-        quant_mode (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
-        force_dequant (:obj:`str`, `optional`, defaults to :obj:`"none"`):
+        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "softmax" or "nonlinear" is given.
    """

@@ -430,15 +430,15 @@ class IntSoftmax(nn.Module):

 class IntLayerNorm(nn.Module):
    """
-    Quantized version of :obj:`torch.nn.LayerNorm`. Adds quantization-specific arguments on top of
-    :obj:`torch.nn.LayerNorm`.
+    Quantized version of `torch.nn.LayerNorm`. Adds quantization-specific arguments on top of
+    `torch.nn.LayerNorm`.

    Args:
-        output_bit (:obj:`int`, `optional`, defaults to :obj:`8`):
+        output_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the layer output activation.
-        quant_mode (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
-        force_dequant (:obj:`str`, `optional`, defaults to :obj:`"none"`):
+        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "layernorm" or "nonlinear" is given.
    """

@@ -535,17 +535,17 @@ def get_percentile_min_max(input, lower_percentile, upper_percentile, output_ten
    Calculate the percentile max and min values in a given tensor

    Args:
-        input (:obj:`torch.Tensor`):
+        input (`torch.Tensor`):
            The target tensor to calculate percentile max and min.
-        lower_percentile (:obj:`float`):
+        lower_percentile (`float`):
            If 0.1, means we return the value of the smallest 0.1% value in the tensor as percentile min.
-        upper_percentile (:obj:`float`):
+        upper_percentile (`float`):
            If 99.9, means we return the value of the largest 0.1% value in the tensor as percentile max.
-        output_tensor (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        output_tensor (`bool`, *optional*, defaults to `False`):
            If True, this function returns tensors, otherwise it returns values.

    Returns:
-        :obj:`Tuple(torch.Tensor, torch.Tensor)`: Percentile min and max value of `input`
+        `Tuple(torch.Tensor, torch.Tensor)`: Percentile min and max value of *input*
    """
    input_length = input.shape[0]

@@ -571,17 +571,17 @@ def linear_quantize(input, scale, zero_point, inplace=False):
    Quantize single-precision input tensor to integers with the given scaling factor and zeropoint.

    Args:
-        input (:obj:`torch.Tensor`):
+        input (`torch.Tensor`):
            Single-precision input tensor to be quantized.
-        scale (:obj:`torch.Tensor`):
+        scale (`torch.Tensor`):
            Scaling factor for quantization.
-        zero_pint (:obj:`torch.Tensor`):
+        zero_pint (`torch.Tensor`):
            Shift for quantization.
-        inplace (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        inplace (`bool`, *optional*, defaults to `False`):
            Whether to compute inplace or not.

    Returns:
-        :obj:`torch.Tensor`: Linearly quantized value of `input` according to `scale` and `zero_point`.
+        `torch.Tensor`: Linearly quantized value of *input* according to *scale* and *zero_point*.
    """
    # reshape scale and zeropoint for convolutional weights and activation
    if len(input.shape) == 4:
@@ -606,16 +606,16 @@ def symmetric_linear_quantization_params(num_bits, saturation_min, saturation_ma
    Compute the scaling factor with the given quantization range for symmetric quantization.

    Args:
-        saturation_min (:obj:`torch.Tensor`):
+        saturation_min (`torch.Tensor`):
            Lower bound for quantization range.
-        saturation_max (:obj:`torch.Tensor`):
+        saturation_max (`torch.Tensor`):
            Upper bound for quantization range.
-        per_channel (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        per_channel (`bool`, *optional*, defaults to `False`):
            Whether to or not use channel-wise quantization.

    Returns:
-        :obj:`torch.Tensor`: Scaling factor that linearly quantizes the given range between `saturation_min` and
-        `saturation_max`.
+        `torch.Tensor`: Scaling factor that linearly quantizes the given range between *saturation_min* and
+        *saturation_max*.
    """
    # in this part, we do not need any gradient computation,
    # in order to enforce this, we put torch.no_grad()
@@ -642,18 +642,18 @@ class SymmetricQuantFunction(Function):
    def forward(ctx, x, k, percentile_mode, scale):
        """
        Args:
-            x (:obj:`torch.Tensor`):
+            x (`torch.Tensor`):
                Floating point tensor to be quantized.
-            k (:obj:`int`):
+            k (`int`):
                Quantization bitwidth.
-            percentile_mode (:obj:`bool`):
+            percentile_mode (`bool`):
                Whether or not to use percentile calibration.
-            scale (:obj:`torch.Tensor`):
-                Pre-calculated scaling factor for `x`. Note that the current implementation of SymmetricQuantFunction
+            scale (`torch.Tensor`):
+                Pre-calculated scaling factor for *x*. Note that the current implementation of SymmetricQuantFunction
                requires pre-calculated scaling factor.

        Returns:
-            :obj:`torch.Tensor`: Symmetric-quantized value of `input`.
+            `torch.Tensor`: Symmetric-quantized value of *input*.
        """
        zero_point = torch.tensor(0.0).to(scale.device)

@@ -712,7 +712,7 @@ def batch_frexp(inputs, max_bit=31):
    Decompose the scaling factor into mantissa and twos exponent.

    Args:
-        scaling_factor (:obj:`torch.Tensor`):
+        scaling_factor (`torch.Tensor`):
            Target scaling factor to decompose.

    Returns:
@@ -746,22 +746,22 @@ class FixedPointMul(Function):
    Function to perform fixed-point arithmetic that can match integer arithmetic on hardware.

    Args:
-        pre_act (:obj:`torch.Tensor`):
+        pre_act (`torch.Tensor`):
            Input tensor.
-        pre_act_scaling_factor (:obj:`torch.Tensor`):
-            Scaling factor of the input tensor `pre_act`.
-        bit_num (:obj:`int`):
+        pre_act_scaling_factor (`torch.Tensor`):
+            Scaling factor of the input tensor *pre_act*.
+        bit_num (`int`):
            Quantization bitwidth.
-        z_scaling_factor (:obj:`torch.Tensor`):
+        z_scaling_factor (`torch.Tensor`):
            Scaling factor of the output tensor.
-        identity (:obj:`torch.Tensor`, `optional`):
+        identity (`torch.Tensor`, *optional*):
            Identity tensor, if exists.
-        identity_scaling_factor (:obj:`torch.Tensor`, `optional`):
-            Scaling factor of the identity tensor `identity`, if exists.
+        identity_scaling_factor (`torch.Tensor`, *optional*):
+            Scaling factor of the identity tensor *identity*, if exists.

    Returns:
-        :obj:`torch.Tensor`: Output tensor(`pre_act` if `identity` is not given, otherwise the addition of `pre_act`
-        and `identity`), whose scale is rescaled to `z_scaling_factor`.
+        `torch.Tensor`: Output tensor(*pre_act* if *identity* is not given, otherwise the addition of *pre_act*
+        and *identity*), whose scale is rescaled to *z_scaling_factor*.
    """

    @staticmethod

--- a/src/transformers/models/imagegpt/configuration_imagegpt.py
+++ b/src/transformers/models/imagegpt/configuration_imagegpt.py
@@ -29,67 +29,68 @@ IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP = {

 class ImageGPTConfig(PretrainedConfig):
    """
-    This is the configuration class to store the configuration of a :class:`~transformers.ImageGPTModel` or a
-    :class:`~transformers.TFImageGPTModel`. It is used to instantiate a GPT-2 model according to the specified
+    This is the configuration class to store the configuration of a [`ImageGPTModel`] or a
+    [`TFImageGPTModel`]. It is used to instantiate a GPT-2 model according to the specified
    arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar
-    configuration to that of the ImageGPT `small <https://huggingface.co/imagegpt>`__ architecture.
+    configuration to that of the ImageGPT [small](https://huggingface.co/imagegpt) architecture.

-    Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
-    outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
+    outputs. Read the documentation from [`PretrainedConfig`] for more information.


    Args:
-        vocab_size (:obj:`int`, `optional`, defaults to 512):
+        vocab_size (`int`, *optional*, defaults to 512):
            Vocabulary size of the GPT-2 model. Defines the number of different tokens that can be represented by the
-            :obj:`inputs_ids` passed when calling :class:`~transformers.ImageGPTModel` or
-            :class:`~transformers.TFImageGPTModel`.
-        n_positions (:obj:`int`, `optional`, defaults to 32*32):
+            `inputs_ids` passed when calling [`ImageGPTModel`] or
+            [`TFImageGPTModel`].
+        n_positions (`int`, *optional*, defaults to 32*32):
            The maximum sequence length that this model might ever be used with. Typically set this to something large
            just in case (e.g., 512 or 1024 or 2048).
-        n_embd (:obj:`int`, `optional`, defaults to 512):
+        n_embd (`int`, *optional*, defaults to 512):
            Dimensionality of the embeddings and hidden states.
-        n_layer (:obj:`int`, `optional`, defaults to 24):
+        n_layer (`int`, *optional*, defaults to 24):
            Number of hidden layers in the Transformer encoder.
-        n_head (:obj:`int`, `optional`, defaults to 8):
+        n_head (`int`, *optional*, defaults to 8):
            Number of attention heads for each attention layer in the Transformer encoder.
-        n_inner (:obj:`int`, `optional`, defaults to None):
-            Dimensionality of the inner feed-forward layers. :obj:`None` will set it to 4 times n_embd
-        activation_function (:obj:`str`, `optional`, defaults to :obj:`"quick_gelu"`):
+        n_inner (`int`, *optional*, defaults to None):
+            Dimensionality of the inner feed-forward layers. `None` will set it to 4 times n_embd
+        activation_function (`str`, *optional*, defaults to `"quick_gelu"`):
            Activation function (can be one of the activation functions defined in src/transformers/activations.py).
            Defaults to "quick_gelu".
-        resid_pdrop (:obj:`float`, `optional`, defaults to 0.1):
+        resid_pdrop (`float`, *optional*, defaults to 0.1):
            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
-        embd_pdrop (:obj:`int`, `optional`, defaults to 0.1):
+        embd_pdrop (`int`, *optional*, defaults to 0.1):
            The dropout ratio for the embeddings.
-        attn_pdrop (:obj:`float`, `optional`, defaults to 0.1):
+        attn_pdrop (`float`, *optional*, defaults to 0.1):
            The dropout ratio for the attention.
-        layer_norm_epsilon (:obj:`float`, `optional`, defaults to 1e-5):
+        layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
            The epsilon to use in the layer normalization layers.
-        initializer_range (:obj:`float`, `optional`, defaults to 0.02):
+        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        scale_attn_weights (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        scale_attn_weights (`bool`, *optional*, defaults to `True`):
            Scale attention weights by dividing by sqrt(hidden_size)..
-        use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models).
-        scale_attn_by_inverse_layer_idx (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether to additionally scale attention weights by ``1 / layer_idx + 1``.
-        reorder_and_upcast_attn (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        scale_attn_by_inverse_layer_idx (`bool`, *optional*, defaults to `False`):
+            Whether to additionally scale attention weights by `1 / layer_idx + 1`.
+        reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`):
            Whether to scale keys (K) prior to computing attention (dot-product) and upcast attention
            dot-product/softmax to float() when training with mixed precision.

-    Example::
+    Example:

-        >>> from transformers import ImageGPTModel, ImageGPTConfig
+    ```python
+    >>> from transformers import ImageGPTModel, ImageGPTConfig

-        >>> # Initializing a ImageGPT configuration
-        >>> configuration = ImageGPTConfig()
+    >>> # Initializing a ImageGPT configuration
+    >>> configuration = ImageGPTConfig()

-        >>> # Initializing a model from the configuration
-        >>> model = ImageGPTModel(configuration)
+    >>> # Initializing a model from the configuration
+    >>> model = ImageGPTModel(configuration)

-        >>> # Accessing the model configuration
-        >>> configuration = model.config
-    """
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""

    model_type = "imagegpt"
    keys_to_ignore_at_inference = ["past_key_values"]

--- a/src/transformers/models/imagegpt/feature_extraction_imagegpt.py
+++ b/src/transformers/models/imagegpt/feature_extraction_imagegpt.py
@@ -49,23 +49,23 @@ class ImageGPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
    resolution (such as 32x32 or 64x64), normalize them and finally color quantize them to obtain sequences of "pixel
    values" (color clusters).

-    This feature extractor inherits from :class:`~transformers.FeatureExtractionMixin` which contains most of the main
+    This feature extractor inherits from [`FeatureExtractionMixin`] which contains most of the main
    methods. Users should refer to this superclass for more information regarding those methods.

    Args:
-        clusters (:obj:`np.ndarray`):
-            The color clusters to use, as a :obj:`np.ndarray` of shape :obj:`(n_clusters, 3)`.
-        do_resize (:obj:`bool`, `optional`, defaults to :obj:`True`):
-            Whether to resize the input to a certain :obj:`size`.
-        size (:obj:`int` or :obj:`Tuple(int)`, `optional`, defaults to 32):
+        clusters (`np.ndarray`):
+            The color clusters to use, as a `np.ndarray` of shape `(n_clusters, 3)`.
+        do_resize (`bool`, *optional*, defaults to `True`):
+            Whether to resize the input to a certain `size`.
+        size (`int` or `Tuple(int)`, *optional*, defaults to 32):
            Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
-            integer is provided, then the input will be resized to (size, size). Only has an effect if :obj:`do_resize`
-            is set to :obj:`True`.
-        resample (:obj:`int`, `optional`, defaults to :obj:`PIL.Image.BILINEAR`):
-            An optional resampling filter. This can be one of :obj:`PIL.Image.NEAREST`, :obj:`PIL.Image.BOX`,
-            :obj:`PIL.Image.BILINEAR`, :obj:`PIL.Image.HAMMING`, :obj:`PIL.Image.BICUBIC` or :obj:`PIL.Image.LANCZOS`.
-            Only has an effect if :obj:`do_resize` is set to :obj:`True`.
-        do_normalize (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize`
+            is set to `True`.
+        resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
+            An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
+            `PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`.
+            Only has an effect if `do_resize` is set to `True`.
+        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether or not to normalize the input to the range between -1 and +1.
    """

@@ -81,14 +81,14 @@ class ImageGPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix

    def normalize(self, image):
        """
-        Normalizes :obj:`image` into the range -1 to +1.
+        Normalizes `image` into the range -1 to +1.

        Args:
-            image (:obj:`PIL.Image.Image` or :obj:`np.ndarray` or :obj:`torch.Tensor`):
+            image (`PIL.Image.Image` or `np.ndarray` or `torch.Tensor`):
                The image to normalize.

        Returns:
-            :obj:`np.ndarray`: The normalized image.
+            `np.ndarray`: The normalized image.
        """
        image = self.to_numpy_array(image, rescale=False, channel_first=False)

@@ -105,27 +105,29 @@ class ImageGPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
        """
        Main method to prepare for the model one or several image(s).

-        .. warning::
+        <Tip warning={true}>

-           NumPy arrays and PyTorch tensors are converted to PIL images when resizing, so the most efficient is to pass
-           PIL images.
+        NumPy arrays and PyTorch tensors are converted to PIL images when resizing, so the most efficient is to pass
+        PIL images.
+
+        </Tip>

        Args:
-            images (:obj:`PIL.Image.Image`, :obj:`np.ndarray`, :obj:`torch.Tensor`, :obj:`List[PIL.Image.Image]`, :obj:`List[np.ndarray]`, :obj:`List[torch.Tensor]`):
+            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
                number of channels, H and W are image height and width.

-            return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`, defaults to :obj:`'np'`):
+            return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`):
                If set, will return tensors of a particular framework. Acceptable values are:

-                * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
-                * :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects.
-                * :obj:`'np'`: Return NumPy :obj:`np.ndarray` objects.
-                * :obj:`'jax'`: Return JAX :obj:`jnp.ndarray` objects.
+                - `'tf'`: Return TensorFlow `tf.constant` objects.
+                - `'pt'`: Return PyTorch `torch.Tensor` objects.
+                - `'np'`: Return NumPy `np.ndarray` objects.
+                - `'jax'`: Return JAX `jnp.ndarray` objects.

        Returns:
-            :class:`~transformers.BatchFeature`: A :class:`~transformers.BatchFeature` with the following fields:
+            [`BatchFeature`]: A [`BatchFeature`] with the following fields:

            - **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height,
              width).

--- a/src/transformers/models/layoutlm/configuration_layoutlm.py
+++ b/src/transformers/models/layoutlm/configuration_layoutlm.py
@@ -34,61 +34,60 @@ LAYOUTLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {

 class LayoutLMConfig(BertConfig):
    r"""
-    This is the configuration class to store the configuration of a :class:`~transformers.LayoutLMModel`. It is used to
+    This is the configuration class to store the configuration of a [`LayoutLMModel`]. It is used to
    instantiate a LayoutLM model according to the specified arguments, defining the model architecture. Instantiating a
-    configuration with the defaults will yield a similar configuration to that of the LayoutLM `layoutlm-base-uncased
-    <https://huggingface.co/microsoft/layoutlm-base-uncased>`__ architecture.
+    configuration with the defaults will yield a similar configuration to that of the LayoutLM [layoutlm-base-uncased](https://huggingface.co/microsoft/layoutlm-base-uncased) architecture.

-    Configuration objects inherit from :class:`~transformers.BertConfig` and can be used to control the model outputs.
-    Read the documentation from :class:`~transformers.BertConfig` for more information.
+    Configuration objects inherit from [`BertConfig`] and can be used to control the model outputs.
+    Read the documentation from [`BertConfig`] for more information.


    Args:
-        vocab_size (:obj:`int`, `optional`, defaults to 30522):
+        vocab_size (`int`, *optional*, defaults to 30522):
            Vocabulary size of the LayoutLM model. Defines the different tokens that can be represented by the
-            `inputs_ids` passed to the forward method of :class:`~transformers.LayoutLMModel`.
-        hidden_size (:obj:`int`, `optional`, defaults to 768):
+            *inputs_ids* passed to the forward method of [`LayoutLMModel`].
+        hidden_size (`int`, *optional*, defaults to 768):
            Dimensionality of the encoder layers and the pooler layer.
-        num_hidden_layers (:obj:`int`, `optional`, defaults to 12):
+        num_hidden_layers (`int`, *optional*, defaults to 12):
            Number of hidden layers in the Transformer encoder.
-        num_attention_heads (:obj:`int`, `optional`, defaults to 12):
+        num_attention_heads (`int`, *optional*, defaults to 12):
            Number of attention heads for each attention layer in the Transformer encoder.
-        intermediate_size (:obj:`int`, `optional`, defaults to 3072):
+        intermediate_size (`int`, *optional*, defaults to 3072):
            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
-        hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`):
+        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
-            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"` and :obj:`"gelu_new"` are supported.
-        hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
+            `"gelu"`, `"relu"`, `"silu"` and `"gelu_new"` are supported.
+        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
-        attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
+        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
            The dropout ratio for the attention probabilities.
-        max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
+        max_position_embeddings (`int`, *optional*, defaults to 512):
            The maximum sequence length that this model might ever be used with. Typically set this to something large
            just in case (e.g., 512 or 1024 or 2048).
-        type_vocab_size (:obj:`int`, `optional`, defaults to 2):
-            The vocabulary size of the :obj:`token_type_ids` passed into :class:`~transformers.LayoutLMModel`.
-        initializer_range (:obj:`float`, `optional`, defaults to 0.02):
+        type_vocab_size (`int`, *optional*, defaults to 2):
+            The vocabulary size of the `token_type_ids` passed into [`LayoutLMModel`].
+        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
+        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
            The epsilon used by the layer normalization layers.
-        max_2d_position_embeddings (:obj:`int`, `optional`, defaults to 1024):
+        max_2d_position_embeddings (`int`, *optional*, defaults to 1024):
            The maximum value that the 2D position embedding might ever used. Typically set this to something large
            just in case (e.g., 1024).

-    Examples::
+    Examples:

-        >>> from transformers import LayoutLMModel, LayoutLMConfig
+    ```python
+    >>> from transformers import LayoutLMModel, LayoutLMConfig

-        >>> # Initializing a LayoutLM configuration
-        >>> configuration = LayoutLMConfig()
+    >>> # Initializing a LayoutLM configuration
+    >>> configuration = LayoutLMConfig()

-        >>> # Initializing a model from the configuration
-        >>> model = LayoutLMModel(configuration)
+    >>> # Initializing a model from the configuration
+    >>> model = LayoutLMModel(configuration)

-        >>> # Accessing the model configuration
-        >>> configuration = model.config
-
-    """
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
    model_type = "layoutlm"

    def __init__(

--- a/src/transformers/models/layoutlm/modeling_layoutlm.py
+++ b/src/transformers/models/layoutlm/modeling_layoutlm.py
@@ -747,34 +747,35 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
        r"""
        Returns:

-        Examples::
+        Examples:

-            >>> from transformers import LayoutLMTokenizer, LayoutLMModel
-            >>> import torch
+        ```python
+        >>> from transformers import LayoutLMTokenizer, LayoutLMModel
+        >>> import torch

-            >>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased')
-            >>> model = LayoutLMModel.from_pretrained('microsoft/layoutlm-base-uncased')
+        >>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased')
+        >>> model = LayoutLMModel.from_pretrained('microsoft/layoutlm-base-uncased')

-            >>> words = ["Hello", "world"]
-            >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
+        >>> words = ["Hello", "world"]
+        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

-            >>> token_boxes = []
-            >>> for word, box in zip(words, normalized_word_boxes):
-            ...     word_tokens = tokenizer.tokenize(word)
-            ...     token_boxes.extend([box] * len(word_tokens))
-            >>> # add bounding boxes of cls + sep tokens
-            >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
+        >>> token_boxes = []
+        >>> for word, box in zip(words, normalized_word_boxes):
+        ...     word_tokens = tokenizer.tokenize(word)
+        ...     token_boxes.extend([box] * len(word_tokens))
+        >>> # add bounding boxes of cls + sep tokens
+        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

-            >>> encoding = tokenizer(' '.join(words), return_tensors="pt")
-            >>> input_ids = encoding["input_ids"]
-            >>> attention_mask = encoding["attention_mask"]
-            >>> token_type_ids = encoding["token_type_ids"]
-            >>> bbox = torch.tensor([token_boxes])
+        >>> encoding = tokenizer(' '.join(words), return_tensors="pt")
+        >>> input_ids = encoding["input_ids"]
+        >>> attention_mask = encoding["attention_mask"]
+        >>> token_type_ids = encoding["token_type_ids"]
+        >>> bbox = torch.tensor([token_boxes])

-            >>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids)
+        >>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids)

-            >>> last_hidden_states = outputs.last_hidden_state
-        """
+        >>> last_hidden_states = outputs.last_hidden_state
+        ```"""
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

--- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py
+++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py
@@ -947,34 +947,35 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel):
        r"""
        Returns:

-        Examples::
+        Examples:

-            >>> from transformers import LayoutLMTokenizer, TFLayoutLMModel
-            >>> import tensorflow as tf
+        ```python
+        >>> from transformers import LayoutLMTokenizer, TFLayoutLMModel
+        >>> import tensorflow as tf

-            >>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased')
-            >>> model = TFLayoutLMModel.from_pretrained('microsoft/layoutlm-base-uncased')
+        >>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased')
+        >>> model = TFLayoutLMModel.from_pretrained('microsoft/layoutlm-base-uncased')

-            >>> words = ["Hello", "world"]
-            >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
+        >>> words = ["Hello", "world"]
+        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

-            >>> token_boxes = []
-            >>> for word, box in zip(words, normalized_word_boxes):
-            ...     word_tokens = tokenizer.tokenize(word)
-            ...     token_boxes.extend([box] * len(word_tokens))
-            >>> # add bounding boxes of cls + sep tokens
-            >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
+        >>> token_boxes = []
+        >>> for word, box in zip(words, normalized_word_boxes):
+        ...     word_tokens = tokenizer.tokenize(word)
+        ...     token_boxes.extend([box] * len(word_tokens))
+        >>> # add bounding boxes of cls + sep tokens
+        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

-            >>> encoding = tokenizer(' '.join(words), return_tensors="tf")
-            >>> input_ids = encoding["input_ids"]
-            >>> attention_mask = encoding["attention_mask"]
-            >>> token_type_ids = encoding["token_type_ids"]
-            >>> bbox = tf.convert_to_tensor([token_boxes])
+        >>> encoding = tokenizer(' '.join(words), return_tensors="tf")
+        >>> input_ids = encoding["input_ids"]
+        >>> attention_mask = encoding["attention_mask"]
+        >>> token_type_ids = encoding["token_type_ids"]
+        >>> bbox = tf.convert_to_tensor([token_boxes])

-            >>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids)
+        >>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids)

-            >>> last_hidden_states = outputs.last_hidden_state
-        """
+        >>> last_hidden_states = outputs.last_hidden_state
+        ```"""
        inputs = input_processing(
            func=self.call,
            config=self.config,

--- a/src/transformers/models/layoutlm/tokenization_layoutlm.py
+++ b/src/transformers/models/layoutlm/tokenization_layoutlm.py
@@ -47,10 +47,10 @@ class LayoutLMTokenizer(BertTokenizer):
    r"""
    Constructs a LayoutLM tokenizer.

-    :class:`~transformers.LayoutLMTokenizer is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
+    [`LayoutLMTokenizer`] is identical to [`BertTokenizer`] and runs end-to-end
    tokenization: punctuation splitting + wordpiece.

-    Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
+    Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning
    parameters.
    """


--- a/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py
+++ b/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py
@@ -52,10 +52,10 @@ class LayoutLMTokenizerFast(BertTokenizerFast):
    r"""
    Constructs a "Fast" LayoutLMTokenizer.

-    :class:`~transformers.LayoutLMTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs
+    [`LayoutLMTokenizerFast`] is identical to [`BertTokenizerFast`] and runs
    end-to-end tokenization: punctuation splitting + wordpiece.

-    Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
+    Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning
    parameters.
    """


--- a/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py
@@ -34,87 +34,87 @@ if is_detectron2_available():

 class LayoutLMv2Config(PretrainedConfig):
    r"""
-    This is the configuration class to store the configuration of a :class:`~transformers.LayoutLMv2Model`. It is used
+    This is the configuration class to store the configuration of a [`LayoutLMv2Model`]. It is used
    to instantiate an LayoutLMv2 model according to the specified arguments, defining the model architecture.
    Instantiating a configuration with the defaults will yield a similar configuration to that of the LayoutLMv2
-    `microsoft/layoutlmv2-base-uncased <https://huggingface.co/microsoft/layoutlmv2-base-uncased>`__ architecture.
+    [microsoft/layoutlmv2-base-uncased](https://huggingface.co/microsoft/layoutlmv2-base-uncased) architecture.

-    Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
-    outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
+    outputs. Read the documentation from [`PretrainedConfig`] for more information.

    Args:
-        vocab_size (:obj:`int`, `optional`, defaults to 30522):
+        vocab_size (`int`, *optional*, defaults to 30522):
            Vocabulary size of the LayoutLMv2 model. Defines the number of different tokens that can be represented by
-            the :obj:`inputs_ids` passed when calling :class:`~transformers.LayoutLMv2Model` or
-            :class:`~transformers.TFLayoutLMv2Model`.
-        hidden_size (:obj:`int`, `optional`, defaults to 768):
+            the `inputs_ids` passed when calling [`LayoutLMv2Model`] or
+            [`TFLayoutLMv2Model`].
+        hidden_size (`int`, *optional*, defaults to 768):
            Dimension of the encoder layers and the pooler layer.
-        num_hidden_layers (:obj:`int`, `optional`, defaults to 12):
+        num_hidden_layers (`int`, *optional*, defaults to 12):
            Number of hidden layers in the Transformer encoder.
-        num_attention_heads (:obj:`int`, `optional`, defaults to 12):
+        num_attention_heads (`int`, *optional*, defaults to 12):
            Number of attention heads for each attention layer in the Transformer encoder.
-        intermediate_size (:obj:`int`, `optional`, defaults to 3072):
+        intermediate_size (`int`, *optional*, defaults to 3072):
            Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
-        hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`):
+        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
-            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"selu"` and :obj:`"gelu_new"` are supported.
-        hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
+            `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
-        attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
+        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
            The dropout ratio for the attention probabilities.
-        max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
+        max_position_embeddings (`int`, *optional*, defaults to 512):
            The maximum sequence length that this model might ever be used with. Typically set this to something large
            just in case (e.g., 512 or 1024 or 2048).
-        type_vocab_size (:obj:`int`, `optional`, defaults to 2):
-            The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.LayoutLMv2Model`
-            or :class:`~transformers.TFLayoutLMv2Model`.
-        initializer_range (:obj:`float`, `optional`, defaults to 0.02):
+        type_vocab_size (`int`, *optional*, defaults to 2):
+            The vocabulary size of the `token_type_ids` passed when calling [`LayoutLMv2Model`]
+            or [`TFLayoutLMv2Model`].
+        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
+        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
            The epsilon used by the layer normalization layers.
-        max_2d_position_embeddings (:obj:`int`, `optional`, defaults to 1024):
+        max_2d_position_embeddings (`int`, *optional*, defaults to 1024):
            The maximum value that the 2D position embedding might ever be used with. Typically set this to something
            large just in case (e.g., 1024).
-        max_rel_pos (:obj:`int`, `optional`, defaults to 128):
+        max_rel_pos (`int`, *optional*, defaults to 128):
            The maximum number of relative positions to be used in the self-attention mechanism.
-        rel_pos_bins (:obj:`int`, `optional`, defaults to 32):
+        rel_pos_bins (`int`, *optional*, defaults to 32):
            The number of relative position bins to be used in the self-attention mechanism.
-        fast_qkv (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        fast_qkv (`bool`, *optional*, defaults to `True`):
            Whether or not to use a single matrix for the queries, keys, values in the self-attention layers.
-        max_rel_2d_pos (:obj:`int`, `optional`, defaults to 256):
+        max_rel_2d_pos (`int`, *optional*, defaults to 256):
            The maximum number of relative 2D positions in the self-attention mechanism.
-        rel_2d_pos_bins (:obj:`int`, `optional`, defaults to 64):
+        rel_2d_pos_bins (`int`, *optional*, defaults to 64):
            The number of 2D relative position bins in the self-attention mechanism.
-        image_feature_pool_shape (:obj:`List[int]`, `optional`, defaults to [7, 7, 256]):
+        image_feature_pool_shape (`List[int]`, *optional*, defaults to [7, 7, 256]):
            The shape of the average-pooled feature map.
-        coordinate_size (:obj:`int`, `optional`, defaults to 128):
+        coordinate_size (`int`, *optional*, defaults to 128):
            Dimension of the coordinate embeddings.
-        shape_size (:obj:`int`, `optional`, defaults to 128):
+        shape_size (`int`, *optional*, defaults to 128):
            Dimension of the width and height embeddings.
-        has_relative_attention_bias (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        has_relative_attention_bias (`bool`, *optional*, defaults to `True`):
            Whether or not to use a relative attention bias in the self-attention mechanism.
-        has_spatial_attention_bias (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        has_spatial_attention_bias (`bool`, *optional*, defaults to `True`):
            Whether or not to use a spatial attention bias in the self-attention mechanism.
-        has_visual_segment_embedding (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        has_visual_segment_embedding (`bool`, *optional*, defaults to `False`):
            Whether or not to add visual segment embeddings.
-        detectron2_config_args (:obj:`dict`, `optional`):
-            Dictionary containing the configuration arguments of the Detectron2 visual backbone. Refer to `this file
-            <https://github.com/microsoft/unilm/blob/master/layoutlmft/layoutlmft/models/layoutlmv2/detectron2_config.py>`__
+        detectron2_config_args (`dict`, *optional*):
+            Dictionary containing the configuration arguments of the Detectron2 visual backbone. Refer to [this file](https://github.com/microsoft/unilm/blob/master/layoutlmft/layoutlmft/models/layoutlmv2/detectron2_config.py)
            for details regarding default values.

-    Example::
+    Example:

-        >>> from transformers import LayoutLMv2Model, LayoutLMv2Config
+    ```python
+    >>> from transformers import LayoutLMv2Model, LayoutLMv2Config

-        >>> # Initializing a LayoutLMv2 microsoft/layoutlmv2-base-uncased style configuration
-        >>> configuration = LayoutLMv2Config()
+    >>> # Initializing a LayoutLMv2 microsoft/layoutlmv2-base-uncased style configuration
+    >>> configuration = LayoutLMv2Config()

-        >>> # Initializing a model from the microsoft/layoutlmv2-base-uncased style configuration
-        >>> model = LayoutLMv2Model(configuration)
+    >>> # Initializing a model from the microsoft/layoutlmv2-base-uncased style configuration
+    >>> model = LayoutLMv2Model(configuration)

-        >>> # Accessing the model configuration
-        >>> configuration = model.config
-    """
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
    model_type = "layoutlmv2"

    def __init__(

--- a/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py
@@ -85,31 +85,32 @@ class LayoutLMv2FeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
    Constructs a LayoutLMv2 feature extractor. This can be used to resize document images to the same size, as well as
    to apply OCR on them in order to get a list of words and normalized bounding boxes.

-    This feature extractor inherits from :class:`~transformers.feature_extraction_utils.PreTrainedFeatureExtractor`
+    This feature extractor inherits from [`~feature_extraction_utils.PreTrainedFeatureExtractor`]
    which contains most of the main methods. Users should refer to this superclass for more information regarding those
    methods.

    Args:
-        do_resize (:obj:`bool`, `optional`, defaults to :obj:`True`):
-            Whether to resize the input to a certain :obj:`size`.
-        size (:obj:`int` or :obj:`Tuple(int)`, `optional`, defaults to 224):
+        do_resize (`bool`, *optional*, defaults to `True`):
+            Whether to resize the input to a certain `size`.
+        size (`int` or `Tuple(int)`, *optional*, defaults to 224):
            Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
-            integer is provided, then the input will be resized to (size, size). Only has an effect if :obj:`do_resize`
-            is set to :obj:`True`.
-        resample (:obj:`int`, `optional`, defaults to :obj:`PIL.Image.BILINEAR`):
-            An optional resampling filter. This can be one of :obj:`PIL.Image.NEAREST`, :obj:`PIL.Image.BOX`,
-            :obj:`PIL.Image.BILINEAR`, :obj:`PIL.Image.HAMMING`, :obj:`PIL.Image.BICUBIC` or :obj:`PIL.Image.LANCZOS`.
-            Only has an effect if :obj:`do_resize` is set to :obj:`True`.
-        apply_ocr (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize`
+            is set to `True`.
+        resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
+            An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
+            `PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`.
+            Only has an effect if `do_resize` is set to `True`.
+        apply_ocr (`bool`, *optional*, defaults to `True`):
            Whether to apply the Tesseract OCR engine to get words + normalized bounding boxes.
-        ocr_lang (:obj:`Optional[str]`, `optional`):
+        ocr_lang (`Optional[str]`, *optional*):
            The language, specified by its ISO code, to be used by the Tesseract OCR engine. By default, English is
            used.

-            .. note::
+            <Tip>

-                LayoutLMv2FeatureExtractor uses Google's Tesseract OCR engine under the hood.
-    """
+            LayoutLMv2FeatureExtractor uses Google's Tesseract OCR engine under the hood.
+
+            </Tip>"""

    model_input_names = ["pixel_values"]

@@ -130,48 +131,49 @@ class LayoutLMv2FeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
        Main method to prepare for the model one or several image(s).

        Args:
-            images (:obj:`PIL.Image.Image`, :obj:`np.ndarray`, :obj:`torch.Tensor`, :obj:`List[PIL.Image.Image]`, :obj:`List[np.ndarray]`, :obj:`List[torch.Tensor]`):
+            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
                number of channels, H and W are image height and width.
-            return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`, defaults to :obj:`'np'`):
+            return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`):
                If set, will return tensors of a particular framework. Acceptable values are:

-                * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
-                * :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects.
-                * :obj:`'np'`: Return NumPy :obj:`np.ndarray` objects.
-                * :obj:`'jax'`: Return JAX :obj:`jnp.ndarray` objects.
+                - `'tf'`: Return TensorFlow `tf.constant` objects.
+                - `'pt'`: Return PyTorch `torch.Tensor` objects.
+                - `'np'`: Return NumPy `np.ndarray` objects.
+                - `'jax'`: Return JAX `jnp.ndarray` objects.

        Returns:
-            :class:`~transformers.BatchFeature`: A :class:`~transformers.BatchFeature` with the following fields:
+            [`BatchFeature`]: A [`BatchFeature`] with the following fields:

            - **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height,
              width).
            - **words** -- Optional words as identified by Tesseract OCR (only when
-              :class:`~transformers.LayoutLMv2FeatureExtractor` was initialized with :obj:`apply_ocr` set to ``True``).
+              [`LayoutLMv2FeatureExtractor`] was initialized with `apply_ocr` set to `True`).
            - **boxes** -- Optional bounding boxes as identified by Tesseract OCR, normalized based on the image size
-              (only when :class:`~transformers.LayoutLMv2FeatureExtractor` was initialized with :obj:`apply_ocr` set to
-              ``True``).
+              (only when [`LayoutLMv2FeatureExtractor`] was initialized with `apply_ocr` set to
+              `True`).

-        Examples::
+        Examples:

-            >>> from transformers import LayoutLMv2FeatureExtractor
-            >>> from PIL import Image
+        ```python
+        >>> from transformers import LayoutLMv2FeatureExtractor
+        >>> from PIL import Image

-            >>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
+        >>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")

-            >>> # option 1: with apply_ocr=True (default)
-            >>> feature_extractor = LayoutLMv2FeatureExtractor()
-            >>> encoding = feature_extractor(image, return_tensors="pt")
-            >>> print(encoding.keys())
-            >>> # dict_keys(['pixel_values', 'words', 'boxes'])
+        >>> # option 1: with apply_ocr=True (default)
+        >>> feature_extractor = LayoutLMv2FeatureExtractor()
+        >>> encoding = feature_extractor(image, return_tensors="pt")
+        >>> print(encoding.keys())
+        >>> # dict_keys(['pixel_values', 'words', 'boxes'])

-            >>> # option 2: with apply_ocr=False
-            >>> feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
-            >>> encoding = feature_extractor(image, return_tensors="pt")
-            >>> print(encoding.keys())
-            >>> # dict_keys(['pixel_values'])
-        """
+        >>> # option 2: with apply_ocr=False
+        >>> feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
+        >>> encoding = feature_extractor(image, return_tensors="pt")
+        >>> print(encoding.keys())
+        >>> # dict_keys(['pixel_values'])
+        ```"""

        # Input type checking for clearer error
        valid_images = False

--- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
@@ -820,21 +820,22 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
        r"""
        Returns:

-        Examples::
+        Examples:

-            >>> from transformers import LayoutLMv2Processor, LayoutLMv2Model
-            >>> from PIL import Image
+        ```python
+        >>> from transformers import LayoutLMv2Processor, LayoutLMv2Model
+        >>> from PIL import Image

-            >>> processor = LayoutLMv2Processor.from_pretrained('microsoft/layoutlmv2-base-uncased')
-            >>> model = LayoutLMv2Model.from_pretrained('microsoft/layoutlmv2-base-uncased')
+        >>> processor = LayoutLMv2Processor.from_pretrained('microsoft/layoutlmv2-base-uncased')
+        >>> model = LayoutLMv2Model.from_pretrained('microsoft/layoutlmv2-base-uncased')

-            >>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
+        >>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")

-            >>> encoding = processor(image, return_tensors="pt")
+        >>> encoding = processor(image, return_tensors="pt")

-            >>> outputs = model(**encoding)
-            >>> last_hidden_states = outputs.last_hidden_state
-        """
+        >>> outputs = model(**encoding)
+        >>> last_hidden_states = outputs.last_hidden_state
+        ```"""
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

--- a/src/transformers/models/layoutlmv2/processing_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/processing_layoutlmv2.py
@@ -29,22 +29,22 @@ class LayoutLMv2Processor:
    Constructs a LayoutLMv2 processor which combines a LayoutLMv2 feature extractor and a LayoutLMv2 tokenizer into a
    single processor.

-    :class:`~transformers.LayoutLMv2Processor` offers all the functionalities you need to prepare data for the model.
+    [`LayoutLMv2Processor`] offers all the functionalities you need to prepare data for the model.

-    It first uses :class:`~transformers.LayoutLMv2FeatureExtractor` to resize document images to a fixed size, and
+    It first uses [`LayoutLMv2FeatureExtractor`] to resize document images to a fixed size, and
    optionally applies OCR to get words and normalized bounding boxes. These are then provided to
-    :class:`~transformers.LayoutLMv2Tokenizer` or :class:`~transformers.LayoutLMv2TokenizerFast`, which turns the words
-    and bounding boxes into token-level :obj:`input_ids`, :obj:`attention_mask`, :obj:`token_type_ids`, :obj:`bbox`.
-    Optionally, one can provide integer :obj:`word_labels`, which are turned into token-level :obj:`labels` for token
+    [`LayoutLMv2Tokenizer`] or [`LayoutLMv2TokenizerFast`], which turns the words
+    and bounding boxes into token-level `input_ids`, `attention_mask`, `token_type_ids`, `bbox`.
+    Optionally, one can provide integer `word_labels`, which are turned into token-level `labels` for token
    classification tasks (such as FUNSD, CORD).

    Args:
-        feature_extractor (:obj:`LayoutLMv2FeatureExtractor`):
-            An instance of :class:`~transformers.LayoutLMv2FeatureExtractor`. The feature extractor is a required
+        feature_extractor (`LayoutLMv2FeatureExtractor`):
+            An instance of [`LayoutLMv2FeatureExtractor`]. The feature extractor is a required
            input.
-        tokenizer (:obj:`LayoutLMv2Tokenizer` or :obj:`LayoutLMv2TokenizerFast`):
-            An instance of :class:`~transformers.LayoutLMv2Tokenizer` or
-            :class:`~transformers.LayoutLMv2TokenizerFast`. The tokenizer is a required input.
+        tokenizer (`LayoutLMv2Tokenizer` or `LayoutLMv2TokenizerFast`):
+            An instance of [`LayoutLMv2Tokenizer`] or
+            [`LayoutLMv2TokenizerFast`]. The tokenizer is a required input.
    """

    def __init__(self, feature_extractor, tokenizer):
@@ -62,18 +62,20 @@ class LayoutLMv2Processor:

    def save_pretrained(self, save_directory):
        """
-        Save a LayoutLMv2 feature_extractor object and LayoutLMv2 tokenizer object to the directory ``save_directory``,
-        so that it can be re-loaded using the :func:`~transformers.LayoutLMv2Processor.from_pretrained` class method.
+        Save a LayoutLMv2 feature_extractor object and LayoutLMv2 tokenizer object to the directory `save_directory`,
+        so that it can be re-loaded using the [`~LayoutLMv2Processor.from_pretrained`] class method.

-        .. note::
+        <Tip>

-            This class method is simply calling
-            :meth:`~transformers.feature_extraction_utils.FeatureExtractionMixin.save_pretrained` and
-            :meth:`~transformers.tokenization_utils_base.PreTrainedTokenizer.save_pretrained`. Please refer to the
-            docstrings of the methods above for more information.
+        This class method is simply calling
+        [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
+        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the
+        docstrings of the methods above for more information.
+
+        </Tip>

        Args:
-            save_directory (:obj:`str` or :obj:`os.PathLike`):
+            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
@@ -84,35 +86,37 @@ class LayoutLMv2Processor:
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, use_fast=True, **kwargs):
        r"""
-        Instantiate a :class:`~transformers.LayoutLMv2Processor` from a pretrained LayoutLMv2 processor.
+        Instantiate a [`LayoutLMv2Processor`] from a pretrained LayoutLMv2 processor.
+
+        <Tip>

-        .. note::
+        This class method is simply calling LayoutLMv2FeatureExtractor's
+        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and
+        LayoutLMv2TokenizerFast's
+        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
+        docstrings of the methods above for more information.

-            This class method is simply calling LayoutLMv2FeatureExtractor's
-            :meth:`~transformers.feature_extraction_utils.FeatureExtractionMixin.from_pretrained` and
-            LayoutLMv2TokenizerFast's
-            :meth:`~transformers.tokenization_utils_base.PreTrainedTokenizer.from_pretrained`. Please refer to the
-            docstrings of the methods above for more information.
+        </Tip>

        Args:
-            pretrained_model_name_or_path (:obj:`str` or :obj:`os.PathLike`):
+            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

-                - a string, the `model id` of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like ``bert-base-uncased``, or
-                  namespaced under a user or organization name, like ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing a feature extractor file saved using the
-                  :meth:`~transformers.SequenceFeatureExtractor.save_pretrained` method, e.g.,
-                  ``./my_model_directory/``.
-                - a path or url to a saved feature extractor JSON `file`, e.g.,
-                  ``./my_model_directory/preprocessor_config.json``.
+                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
+                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
+                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
+                - a path to a *directory* containing a feature extractor file saved using the
+                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g.,
+                  `./my_model_directory/`.
+                - a path or url to a saved feature extractor JSON *file*, e.g.,
+                  `./my_model_directory/preprocessor_config.json`.

-            use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            use_fast (`bool`, *optional*, defaults to `True`):
                Whether or not to instantiate a fast tokenizer.

            **kwargs
-                Additional keyword arguments passed along to both :class:`~transformers.SequenceFeatureExtractor` and
-                :class:`~transformers.PreTrainedTokenizer`
+                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
+                [`PreTrainedTokenizer`]
        """
        feature_extractor = LayoutLMv2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        if use_fast:
@@ -146,14 +150,12 @@ class LayoutLMv2Processor:
        **kwargs
    ) -> BatchEncoding:
        """
-        This method first forwards the :obj:`images` argument to
-        :meth:`~transformers.LayoutLMv2FeatureExtractor.__call__`. In case :class:`~LayoutLMv2FeatureExtractor` was
-        initialized with :obj:`apply_ocr` set to ``True``, it passes the obtained words and bounding boxes along with
-        the additional arguments to :meth:`~transformers.LayoutLMv2Tokenizer.__call__` and returns the output, together
-        with resized :obj:`images`. In case :class:`~LayoutLMv2FeatureExtractor` was initialized with :obj:`apply_ocr`
-        set to ``False``, it passes the words (:obj:`text`/:obj:`text_pair`) and :obj:`boxes` specified by the user
-        along with the additional arguments to :meth:`~transformers.LayoutLMv2Tokenizer.__call__` and returns the
-        output, together with resized :obj:`images`.
+        This method first forwards the `images` argument to
+        [`~LayoutLMv2FeatureExtractor.__call__`]. In case [`LayoutLMv2FeatureExtractor`] was
+        initialized with `apply_ocr` set to `True`, it passes the obtained words and bounding boxes along with
+        the additional arguments to [`~LayoutLMv2Tokenizer.__call__`] and returns the output, together
+        with resized `images`. In case [`LayoutLMv2FeatureExtractor`] was initialized with `apply_ocr`
+        set to `False`, it passes the words (`text`/``text_pair`) and `boxes` specified by the user along with the additional arguments to [`~LayoutLMv2Tokenizer.__call__`] and returns the output, together with resized `images``.

        Please refer to the docstring of the above two methods for more information.
        """

--- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py
@@ -59,51 +59,51 @@ PRETRAINED_INIT_CONFIGURATION = {


 LAYOUTLMV2_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
-            add_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            add_special_tokens (`bool`, *optional*, defaults to `True`):
                Whether or not to encode the sequences with the special tokens relative to their model.
-            padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`False`):
+            padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`):
                Activates and controls padding. Accepts the following values:

-                * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a
+                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a
                  single sequence if provided).
-                * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the
                  maximum acceptable input length for the model if that argument is not provided.
-                * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                  different lengths).
-            truncation (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.TruncationStrategy`, `optional`, defaults to :obj:`False`):
+            truncation (`bool`, `str` or [`~tokenization_utils_base.TruncationStrategy`], *optional*, defaults to `False`):
                Activates and controls truncation. Accepts the following values:

-                * :obj:`True` or :obj:`'longest_first'`: Truncate to a maximum length specified with the argument
-                  :obj:`max_length` or to the maximum acceptable input length for the model if that argument is not
+                - `True` or `'longest_first'`: Truncate to a maximum length specified with the argument
+                  `max_length` or to the maximum acceptable input length for the model if that argument is not
                  provided. This will truncate token by token, removing a token from the longest sequence in the pair
                  if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'only_first'`: Truncate to a maximum length specified with the argument :obj:`max_length` or to
+                - `'only_first'`: Truncate to a maximum length specified with the argument `max_length` or to
                  the maximum acceptable input length for the model if that argument is not provided. This will only
                  truncate the first sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'only_second'`: Truncate to a maximum length specified with the argument :obj:`max_length` or
+                - `'only_second'`: Truncate to a maximum length specified with the argument `max_length` or
                  to the maximum acceptable input length for the model if that argument is not provided. This will only
                  truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`False` or :obj:`'do_not_truncate'` (default): No truncation (i.e., can output batch with
+                - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with
                  sequence lengths greater than the model maximum admissible input size).
-            max_length (:obj:`int`, `optional`):
+            max_length (`int`, *optional*):
                Controls the maximum length to use by one of the truncation/padding parameters. If left unset or set to
-                :obj:`None`, this will use the predefined model maximum length if a maximum length is required by one
+                `None`, this will use the predefined model maximum length if a maximum length is required by one
                of the truncation/padding parameters. If the model has no specific maximum input length (like XLNet)
                truncation/padding to a maximum length will be deactivated.
-            stride (:obj:`int`, `optional`, defaults to 0):
-                If set to a number along with :obj:`max_length`, the overflowing tokens returned when
-                :obj:`return_overflowing_tokens=True` will contain some tokens from the end of the truncated sequence
+            stride (`int`, *optional*, defaults to 0):
+                If set to a number along with `max_length`, the overflowing tokens returned when
+                `return_overflowing_tokens=True` will contain some tokens from the end of the truncated sequence
                returned to provide some overlap between truncated and overflowing sequences. The value of this
                argument defines the number of overlapping tokens.
-            pad_to_multiple_of (:obj:`int`, `optional`):
+            pad_to_multiple_of (`int`, *optional*):
                If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
                the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
-            return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`):
+            return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
                If set, will return tensors instead of list of python integers. Acceptable values are:

-                * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
-                * :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects.
-                * :obj:`'np'`: Return Numpy :obj:`np.ndarray` objects.
+                - `'tf'`: Return TensorFlow `tf.constant` objects.
+                - `'pt'`: Return PyTorch `torch.Tensor` objects.
+                - `'np'`: Return Numpy `np.ndarray` objects.
 """


@@ -145,14 +145,14 @@ def subfinder(mylist, pattern):

 class LayoutLMv2Tokenizer(PreTrainedTokenizer):
    r"""
-    Construct a LayoutLMv2 tokenizer. Based on WordPiece. :class:`~transformers.LayoutLMv2Tokenizer` can be used to
-    turn words, word-level bounding boxes and optional word labels to token-level :obj:`input_ids`,
-    :obj:`attention_mask`, :obj:`token_type_ids`, :obj:`bbox`, and optional :obj:`labels` (for token classification).
+    Construct a LayoutLMv2 tokenizer. Based on WordPiece. [`LayoutLMv2Tokenizer`] can be used to
+    turn words, word-level bounding boxes and optional word labels to token-level `input_ids`,
+    `attention_mask`, `token_type_ids`, `bbox`, and optional `labels` (for token classification).

-    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods.
+    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
    Users should refer to this superclass for more information regarding those methods.

-    :class:`~transformers.LayoutLMv2Tokenizer` runs end-to-end tokenization: punctuation splitting and wordpiece. It
+    [`LayoutLMv2Tokenizer`] runs end-to-end tokenization: punctuation splitting and wordpiece. It
    also turns the word-level bounding boxes into token-level bounding boxes.

    """
@@ -274,17 +274,17 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. A BERT sequence has the following format:

-        - single sequence: ``[CLS] X [SEP]``
-        - pair of sequences: ``[CLS] A [SEP] B [SEP]``
+        - single sequence: `[CLS] X [SEP]`
+        - pair of sequences: `[CLS] A [SEP] B [SEP]`

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
-            :obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
+            `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        if token_ids_1 is None:
            return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
@@ -297,18 +297,18 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
-        special tokens using the tokenizer ``prepare_for_model`` method.
+        special tokens using the tokenizer `prepare_for_model` method.

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
-            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
-            :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """

        if already_has_special_tokens:
@@ -326,16 +326,16 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
        """
        Create a mask from the two sequences passed to be used in a sequence-pair classification task. A BERT sequence
        pair mask has the following format: :: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second
-        sequence | If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s).
+        sequence | If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
-            :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
+            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
            sequence(s).
        """
        sep = [self.sep_token_id]
@@ -392,16 +392,16 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
        sequences with word-level normalized bounding boxes and optional labels.

        Args:
-            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string, a list of strings
                (words of a single example or questions of a batch of examples) or a list of list of strings (batch of
                words).
-            text_pair (:obj:`List[str]`, :obj:`List[List[str]]`):
+            text_pair (`List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence should be a list of strings
                (pretokenized string).
-            boxes (:obj:`List[List[int]]`, :obj:`List[List[List[int]]]`):
+            boxes (`List[List[int]]`, `List[List[List[int]]]`):
                Word-level bounding boxes. Each bounding box should be normalized to be on a 0-1000 scale.
-            word_labels (:obj:`List[int]`, :obj:`List[List[int]]`, `optional`):
+            word_labels (`List[int]`, `List[List[int]]`, *optional*):
                Word-level integer labels (for token classification tasks such as FUNSD, CORD).
        """
        # Input type checking for clearer error
@@ -772,12 +772,12 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
    ) -> BatchEncoding:
        """
        Tokenize and prepare for the model a sequence or a pair of sequences. .. warning:: This method is deprecated,
-        ``__call__`` should be used instead.
+        `__call__` should be used instead.

        Args:
-            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`):
                The first sequence to be encoded. This can be a string, a list of strings or a list of list of strings.
-            text_pair (:obj:`List[str]` or :obj:`List[int]`, `optional`):
+            text_pair (`List[str]` or `List[int]`, *optional*):
                Optional second sequence to be encoded. This can be a list of strings (words of a single example) or a
                list of list of strings (words of a batch of examples).
        """
@@ -893,18 +893,18 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
        """
        Prepares a sequence or a pair of sequences so that it can be used by the model. It adds special tokens,
        truncates sequences if overflowing while taking into account the special tokens and manages a moving window
-        (with user defined stride) for overflowing tokens. Please Note, for `text_pair` different than `None` and
-        `truncation_strategy = longest_first` or `True`, it is not possible to return overflowing tokens. Such a
+        (with user defined stride) for overflowing tokens. Please Note, for *text_pair* different than *None* and
+        *truncation_strategy = longest_first* or *True*, it is not possible to return overflowing tokens. Such a
        combination of arguments will raise an error.

-        Word-level :obj:`boxes` are turned into token-level :obj:`bbox`. If provided, word-level :obj:`word_labels` are
-        turned into token-level :obj:`labels`. The word label is used for the first token of the word, while remaining
+        Word-level `boxes` are turned into token-level `bbox`. If provided, word-level `word_labels` are
+        turned into token-level `labels`. The word label is used for the first token of the word, while remaining
        tokens are labeled with -100, such that they will be ignored by the loss function.

        Args:
-            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`):
                The first sequence to be encoded. This can be a string, a list of strings or a list of list of strings.
-            text_pair (:obj:`List[str]` or :obj:`List[int]`, `optional`):
+            text_pair (`List[str]` or `List[int]`, *optional*):
                Optional second sequence to be encoded. This can be a list of strings (words of a single example) or a
                list of list of strings (words of a batch of examples).
        """
@@ -1092,42 +1092,42 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
        Truncates a sequence pair in-place following the strategy.

        Args:
-            ids (:obj:`List[int]`):
-                Tokenized input ids of the first sequence. Can be obtained from a string by chaining the ``tokenize``
-                and ``convert_tokens_to_ids`` methods.
-            token_boxes (:obj:`List[List[int]]`):
+            ids (`List[int]`):
+                Tokenized input ids of the first sequence. Can be obtained from a string by chaining the `tokenize`
+                and `convert_tokens_to_ids` methods.
+            token_boxes (`List[List[int]]`):
                Bounding boxes of the first sequence.
-            pair_ids (:obj:`List[int]`, `optional`):
-                Tokenized input ids of the second sequence. Can be obtained from a string by chaining the ``tokenize``
-                and ``convert_tokens_to_ids`` methods.
-            pair_token_boxes (:obj:`List[List[int]]`, `optional`):
+            pair_ids (`List[int]`, *optional*):
+                Tokenized input ids of the second sequence. Can be obtained from a string by chaining the `tokenize`
+                and `convert_tokens_to_ids` methods.
+            pair_token_boxes (`List[List[int]]`, *optional*):
                Bounding boxes of the second sequence.
-            labels (:obj:`List[int]`, `optional`):
+            labels (`List[int]`, *optional*):
                Labels of the first sequence (for token classification tasks).
-            num_tokens_to_remove (:obj:`int`, `optional`, defaults to 0):
+            num_tokens_to_remove (`int`, *optional*, defaults to 0):
                Number of tokens to remove using the truncation strategy.
-            truncation_strategy (:obj:`str` or :class:`~transformers.tokenization_utils_base.TruncationStrategy`, `optional`, defaults to :obj:`False`):
+            truncation_strategy (`str` or [`~tokenization_utils_base.TruncationStrategy`], *optional*, defaults to `False`):
                The strategy to follow for truncation. Can be:

-                * :obj:`'longest_first'`: Truncate to a maximum length specified with the argument :obj:`max_length` or
+                - `'longest_first'`: Truncate to a maximum length specified with the argument `max_length` or
                  to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate token by token, removing a token from the longest sequence in the pair if a pair of
                  sequences (or a batch of pairs) is provided.
-                * :obj:`'only_first'`: Truncate to a maximum length specified with the argument :obj:`max_length` or to
+                - `'only_first'`: Truncate to a maximum length specified with the argument `max_length` or to
                  the maximum acceptable input length for the model if that argument is not provided. This will only
                  truncate the first sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'only_second'`: Truncate to a maximum length specified with the argument :obj:`max_length` or
+                - `'only_second'`: Truncate to a maximum length specified with the argument `max_length` or
                  to the maximum acceptable input length for the model if that argument is not provided. This will only
                  truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
+                - `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).
-            stride (:obj:`int`, `optional`, defaults to 0):
+            stride (`int`, *optional*, defaults to 0):
                If set to a positive number, the overflowing tokens returned will contain some tokens from the main
                sequence returned. The value of this argument defines the number of additional tokens.

        Returns:
-            :obj:`Tuple[List[int], List[int], List[int]]`: The truncated ``ids``, the truncated ``pair_ids`` and the
-            list of overflowing tokens. Note: The `longest_first` strategy returns empty list of overflowing tokens if
+            `Tuple[List[int], List[int], List[int]]`: The truncated `ids`, the truncated `pair_ids` and the
+            list of overflowing tokens. Note: The *longest_first* strategy returns empty list of overflowing tokens if
            a pair of sequences (or a batch of pairs) is provided.
        """
        if num_tokens_to_remove <= 0:
@@ -1291,19 +1291,18 @@ class BasicTokenizer(object):
    Constructs a BasicTokenizer that will run basic tokenization (punctuation splitting, lower casing, etc.).

    Args:
-        do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        do_lower_case (`bool`, *optional*, defaults to `True`):
            Whether or not to lowercase the input when tokenizing.
-        never_split (:obj:`Iterable`, `optional`):
+        never_split (`Iterable`, *optional*):
            Collection of tokens which will never be split during tokenization. Only has an effect when
-            :obj:`do_basic_tokenize=True`
-        tokenize_chinese_chars (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            `do_basic_tokenize=True`
+        tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
            Whether or not to tokenize Chinese characters.

-            This should likely be deactivated for Japanese (see this `issue
-            <https://github.com/huggingface/transformers/issues/328>`__).
-        strip_accents: (:obj:`bool`, `optional`):
+            This should likely be deactivated for Japanese (see this [issue](https://github.com/huggingface/transformers/issues/328)).
+        strip_accents: (`bool`, *optional*):
            Whether or not to strip all accents. If this option is not specified, then it will be determined by the
-            value for :obj:`lowercase` (as in the original BERT).
+            value for `lowercase` (as in the original BERT).
    """

    def __init__(self, do_lower_case=True, never_split=None, tokenize_chinese_chars=True, strip_accents=None):
@@ -1320,9 +1319,9 @@ class BasicTokenizer(object):
        WordPieceTokenizer.

        Args:
-            **never_split**: (`optional`) list of str
+            never_split (`LIst[str]`, *optional*)
                Kept for backward compatibility purposes. Now implemented directly at the base class level (see
-                :func:`PreTrainedTokenizer.tokenize`) List of token not to split.
+                [`PreTrainedTokenizer.tokenize`]) List of token not to split.
        """
        # union() returns a new set by concatenating the two sets.
        never_split = self.never_split.union(set(never_split)) if never_split else self.never_split
@@ -1449,14 +1448,14 @@ class WordpieceTokenizer(object):
        Tokenizes a piece of text into its word pieces. This uses a greedy longest-match-first algorithm to perform
        tokenization using the given vocabulary.

-        For example, :obj:`input = "unaffable"` wil return as output :obj:`["un", "##aff", "##able"]`.
+        For example, `input = "unaffable"` wil return as output `["un", "##aff", "##able"]`.

        Args:
-          text: A single token or whitespace separated tokens. This should have
-            already been passed through `BasicTokenizer`.
+            text: A single token or whitespace separated tokens. This should have
+                already been passed through *BasicTokenizer*.

        Returns:
-          A list of wordpiece tokens.
+            A list of wordpiece tokens.
        """

        output_tokens = []

--- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py
+++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py
@@ -61,48 +61,48 @@ PRETRAINED_INIT_CONFIGURATION = {

 class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
    r"""
-    Construct a "fast" LayoutLMv2 tokenizer (backed by HuggingFace's `tokenizers` library). Based on WordPiece.
+    Construct a "fast" LayoutLMv2 tokenizer (backed by HuggingFace's *tokenizers* library). Based on WordPiece.

-    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main
+    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main
    methods. Users should refer to this superclass for more information regarding those methods.

    Args:
-        vocab_file (:obj:`str`):
+        vocab_file (`str`):
            File containing the vocabulary.
-        do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        do_lower_case (`bool`, *optional*, defaults to `True`):
            Whether or not to lowercase the input when tokenizing.
-        unk_token (:obj:`str`, `optional`, defaults to :obj:`"[UNK]"`):
+        unk_token (`str`, *optional*, defaults to `"[UNK]"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
-        sep_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`):
+        sep_token (`str`, *optional*, defaults to `"[SEP]"`):
            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
            sequence classification or for a text and a question for question answering. It is also used as the last
            token of a sequence built with special tokens.
-        pad_token (:obj:`str`, `optional`, defaults to :obj:`"[PAD]"`):
+        pad_token (`str`, *optional*, defaults to `"[PAD]"`):
            The token used for padding, for example when batching sequences of different lengths.
-        cls_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`):
+        cls_token (`str`, *optional*, defaults to `"[CLS]"`):
            The classifier token which is used when doing sequence classification (classification of the whole sequence
            instead of per-token classification). It is the first token of the sequence when built with special tokens.
-        mask_token (:obj:`str`, `optional`, defaults to :obj:`"[MASK]"`):
+        mask_token (`str`, *optional*, defaults to `"[MASK]"`):
            The token used for masking values. This is the token used when training this model with masked language
            modeling. This is the token which the model will try to predict.
-        cls_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[0, 0, 0, 0]`):
+        cls_token_box (`List[int]`, *optional*, defaults to `[0, 0, 0, 0]`):
            The bounding box to use for the special [CLS] token.
-        sep_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[1000, 1000, 1000, 1000]`):
+        sep_token_box (`List[int]`, *optional*, defaults to `[1000, 1000, 1000, 1000]`):
            The bounding box to use for the special [SEP] token.
-        pad_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[0, 0, 0, 0]`):
+        pad_token_box (`List[int]`, *optional*, defaults to `[0, 0, 0, 0]`):
            The bounding box to use for the special [PAD] token.
-        pad_token_label (:obj:`int`, `optional`, defaults to -100):
-            The label to use for padding tokens. Defaults to -100, which is the :obj:`ignore_index` of PyTorch's
+        pad_token_label (`int`, *optional*, defaults to -100):
+            The label to use for padding tokens. Defaults to -100, which is the `ignore_index` of PyTorch's
            CrossEntropyLoss.
-        only_label_first_subword (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        only_label_first_subword (`bool`, *optional*, defaults to `True`):
            Whether or not to only label the first subword, in case word labels are provided.
-        tokenize_chinese_chars (:obj:`bool`, `optional`, defaults to :obj:`True`):
-            Whether or not to tokenize Chinese characters. This should likely be deactivated for Japanese (see `this
-            issue <https://github.com/huggingface/transformers/issues/328>`__).
-        strip_accents: (:obj:`bool`, `optional`):
+        tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
+            Whether or not to tokenize Chinese characters. This should likely be deactivated for Japanese (see [this
+            issue](https://github.com/huggingface/transformers/issues/328)).
+        strip_accents: (`bool`, *optional*):
            Whether or not to strip all accents. If this option is not specified, then it will be determined by the
-            value for :obj:`lowercase` (as in the original LayoutLMv2).
+            value for `lowercase` (as in the original LayoutLMv2).
    """

    vocab_files_names = VOCAB_FILES_NAMES
@@ -196,16 +196,16 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
        sequences with word-level normalized bounding boxes and optional labels.

        Args:
-            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string, a list of strings
                (words of a single example or questions of a batch of examples) or a list of list of strings (batch of
                words).
-            text_pair (:obj:`List[str]`, :obj:`List[List[str]]`):
+            text_pair (`List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence should be a list of strings
                (pretokenized string).
-            boxes (:obj:`List[List[int]]`, :obj:`List[List[List[int]]]`):
+            boxes (`List[List[int]]`, `List[List[List[int]]]`):
                Word-level bounding boxes. Each bounding box should be normalized to be on a 0-1000 scale.
-            word_labels (:obj:`List[int]`, :obj:`List[List[int]]`, `optional`):
+            word_labels (`List[int]`, `List[List[int]]`, *optional*):
                Word-level integer labels (for token classification tasks such as FUNSD, CORD).
        """
        # Input type checking for clearer error
@@ -407,12 +407,12 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
    ) -> BatchEncoding:
        """
        Tokenize and prepare for the model a sequence or a pair of sequences. .. warning:: This method is deprecated,
-        ``__call__`` should be used instead.
+        `__call__` should be used instead.

        Args:
-            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`):
                The first sequence to be encoded. This can be a string, a list of strings or a list of list of strings.
-            text_pair (:obj:`List[str]` or :obj:`List[int]`, `optional`):
+            text_pair (`List[str]` or `List[int]`, *optional*):
                Optional second sequence to be encoded. This can be a list of strings (words of a single example) or a
                list of list of strings (words of a batch of examples).
        """
@@ -760,17 +760,17 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. A BERT sequence has the following format:

-        - single sequence: ``[CLS] X [SEP]``
-        - pair of sequences: ``[CLS] A [SEP] B [SEP]``
+        - single sequence: `[CLS] X [SEP]`
+        - pair of sequences: `[CLS] A [SEP] B [SEP]`

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
-            :obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
+            `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]

@@ -785,16 +785,16 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
        """
        Create a mask from the two sequences passed to be used in a sequence-pair classification task. A BERT sequence
        pair mask has the following format: :: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second
-        sequence | If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s).
+        sequence | If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
-            :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
+            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
            sequence(s).
        """
        sep = [self.sep_token_id]

--- a/src/transformers/models/layoutxlm/processing_layoutxlm.py
+++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py
@@ -30,21 +30,21 @@ class LayoutXLMProcessor:
    Constructs a LayoutXLM processor which combines a LayoutXLM feature extractor and a LayoutXLM tokenizer into a
    single processor.

-    :class:`~transformers.LayoutXLMProcessor` offers all the functionalities you need to prepare data for the model.
+    [`LayoutXLMProcessor`] offers all the functionalities you need to prepare data for the model.

-    It first uses :class:`~transformers.LayoutLMv2FeatureExtractor` to resize document images to a fixed size, and
+    It first uses [`LayoutLMv2FeatureExtractor`] to resize document images to a fixed size, and
    optionally applies OCR to get words and normalized bounding boxes. These are then provided to
-    :class:`~transformers.LayoutXLMTokenizer` or :class:`~transformers.LayoutXLMTokenizerFast`, which turns the words
-    and bounding boxes into token-level :obj:`input_ids`, :obj:`attention_mask`, :obj:`token_type_ids`, :obj:`bbox`.
-    Optionally, one can provide integer :obj:`word_labels`, which are turned into token-level :obj:`labels` for token
+    [`LayoutXLMTokenizer`] or [`LayoutXLMTokenizerFast`], which turns the words
+    and bounding boxes into token-level `input_ids`, `attention_mask`, `token_type_ids`, `bbox`.
+    Optionally, one can provide integer `word_labels`, which are turned into token-level `labels` for token
    classification tasks (such as FUNSD, CORD).

    Args:
-        feature_extractor (:obj:`LayoutLMv2FeatureExtractor`):
-            An instance of :class:`~transformers.LayoutLMv2FeatureExtractor`. The feature extractor is a required
+        feature_extractor (`LayoutLMv2FeatureExtractor`):
+            An instance of [`LayoutLMv2FeatureExtractor`]. The feature extractor is a required
            input.
-        tokenizer (:obj:`LayoutXLMTokenizer` or :obj:`LayoutXLMTokenizerFast`):
-            An instance of :class:`~transformers.LayoutXLMTokenizer` or :class:`~transformers.LayoutXLMTokenizerFast`.
+        tokenizer (`LayoutXLMTokenizer` or `LayoutXLMTokenizerFast`):
+            An instance of [`LayoutXLMTokenizer`] or [`LayoutXLMTokenizerFast`].
            The tokenizer is a required input.
    """

@@ -63,18 +63,20 @@ class LayoutXLMProcessor:

    def save_pretrained(self, save_directory):
        """
-        Save a LayoutXLM feature_extractor object and LayoutXLM tokenizer object to the directory ``save_directory``,
-        so that it can be re-loaded using the :func:`~transformers.LayoutXLMProcessor.from_pretrained` class method.
+        Save a LayoutXLM feature_extractor object and LayoutXLM tokenizer object to the directory `save_directory`,
+        so that it can be re-loaded using the [`~LayoutXLMProcessor.from_pretrained`] class method.

-        .. note::
+        <Tip>

-            This class method is simply calling
-            :meth:`~transformers.feature_extraction_utils.FeatureExtractionMixin.save_pretrained` and
-            :meth:`~transformers.tokenization_utils_base.PreTrainedTokenizer.save_pretrained`. Please refer to the
-            docstrings of the methods above for more information.
+        This class method is simply calling
+        [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
+        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the
+        docstrings of the methods above for more information.
+
+        </Tip>

        Args:
-            save_directory (:obj:`str` or :obj:`os.PathLike`):
+            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
@@ -85,34 +87,36 @@ class LayoutXLMProcessor:
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, use_fast=True, **kwargs):
        r"""
-        Instantiate a :class:`~transformers.LayoutXLMProcessor` from a pretrained LayoutXLM processor.
+        Instantiate a [`LayoutXLMProcessor`] from a pretrained LayoutXLM processor.
+
+        <Tip>

-        .. note::
+        This class method is simply calling Layoutv2FeatureExtractor's
+        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and
+        LayoutXLMTokenizerFast's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
+        Please refer to the docstrings of the methods above for more information.

-            This class method is simply calling Layoutv2FeatureExtractor's
-            :meth:`~transformers.feature_extraction_utils.FeatureExtractionMixin.from_pretrained` and
-            LayoutXLMTokenizerFast's :meth:`~transformers.tokenization_utils_base.PreTrainedTokenizer.from_pretrained`.
-            Please refer to the docstrings of the methods above for more information.
+        </Tip>

        Args:
-            pretrained_model_name_or_path (:obj:`str` or :obj:`os.PathLike`):
+            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

-                - a string, the `model id` of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like ``bert-base-uncased``, or
-                  namespaced under a user or organization name, like ``dbmdz/bert-base-german-cased``.
-                - a path to a `directory` containing a feature extractor file saved using the
-                  :meth:`~transformers.SequenceFeatureExtractor.save_pretrained` method, e.g.,
-                  ``./my_model_directory/``.
-                - a path or url to a saved feature extractor JSON `file`, e.g.,
-                  ``./my_model_directory/preprocessor_config.json``.
+                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
+                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
+                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
+                - a path to a *directory* containing a feature extractor file saved using the
+                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g.,
+                  `./my_model_directory/`.
+                - a path or url to a saved feature extractor JSON *file*, e.g.,
+                  `./my_model_directory/preprocessor_config.json`.

-            use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            use_fast (`bool`, *optional*, defaults to `True`):
                Whether or not to instantiate a fast tokenizer.

            **kwargs
-                Additional keyword arguments passed along to both :class:`~transformers.SequenceFeatureExtractor` and
-                :class:`~transformers.PreTrainedTokenizer`
+                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
+                [`PreTrainedTokenizer`]
        """
        feature_extractor = LayoutLMv2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        if use_fast:
@@ -146,14 +150,12 @@ class LayoutXLMProcessor:
        **kwargs
    ) -> BatchEncoding:
        """
-        This method first forwards the :obj:`images` argument to
-        :meth:`~transformers.LayoutLMv2FeatureExtractor.__call__`. In case :class:`~LayoutLMv2FeatureExtractor` was
-        initialized with :obj:`apply_ocr` set to ``True``, it passes the obtained words and bounding boxes along with
-        the additional arguments to :meth:`~transformers.LayoutXLMTokenizer.__call__` and returns the output, together
-        with resized :obj:`images`. In case :class:`~LayoutLMv2FeatureExtractor` was initialized with :obj:`apply_ocr`
-        set to ``False``, it passes the words (:obj:`text`/:obj:`text_pair`) and :obj:`boxes` specified by the user
-        along with the additional arguments to :meth:`~transformers.LayoutXLMTokenizer.__call__` and returns the
-        output, together with resized :obj:`images`.
+        This method first forwards the `images` argument to
+        [`~LayoutLMv2FeatureExtractor.__call__`]. In case [`LayoutLMv2FeatureExtractor`] was
+        initialized with `apply_ocr` set to `True`, it passes the obtained words and bounding boxes along with
+        the additional arguments to [`~LayoutXLMTokenizer.__call__`] and returns the output, together
+        with resized `images`. In case [`LayoutLMv2FeatureExtractor`] was initialized with `apply_ocr`
+        set to `False`, it passes the words (`text`/``text_pair`) and `boxes` specified by the user along with the additional arguments to [`~LayoutXLMTokenizer.__call__`] and returns the output, together with resized `images``.

        Please refer to the docstring of the above two methods for more information.
        """

--- a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py
+++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py
@@ -47,75 +47,80 @@ logger = logging.get_logger(__name__)

 class LayoutXLMTokenizer(PreTrainedTokenizer):
    """
-    Adapted from :class:`~transformers.RobertaTokenizer` and :class:`~transformers.XLNetTokenizer`. Based on
-    `SentencePiece <https://github.com/google/sentencepiece>`__.
+    Adapted from [`RobertaTokenizer`] and [`XLNetTokenizer`]. Based on
+    [SentencePiece](https://github.com/google/sentencepiece).

-    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods.
+    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
    Users should refer to this superclass for more information regarding those methods.

    Args:
-        vocab_file (:obj:`str`):
+        vocab_file (`str`):
            Path to the vocabulary file.
-        bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
+        bos_token (`str`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.

-            .. note::
+            <Tip>

-                When building a sequence using special tokens, this is not the token that is used for the beginning of
-                sequence. The token used is the :obj:`cls_token`.
-        eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
+            When building a sequence using special tokens, this is not the token that is used for the beginning of
+            sequence. The token used is the `cls_token`.
+
+            </Tip>
+
+        eos_token (`str`, *optional*, defaults to `"</s>"`):
            The end of sequence token.

-            .. note::
+            <Tip>
+
+            When building a sequence using special tokens, this is not the token that is used for the end of
+            sequence. The token used is the `sep_token`.
+
+            </Tip>

-                When building a sequence using special tokens, this is not the token that is used for the end of
-                sequence. The token used is the :obj:`sep_token`.
-        sep_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
+        sep_token (`str`, *optional*, defaults to `"</s>"`):
            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
            sequence classification or for a text and a question for question answering. It is also used as the last
            token of a sequence built with special tokens.
-        cls_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
+        cls_token (`str`, *optional*, defaults to `"<s>"`):
            The classifier token which is used when doing sequence classification (classification of the whole sequence
            instead of per-token classification). It is the first token of the sequence when built with special tokens.
-        unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
+        unk_token (`str`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
-        pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`):
+        pad_token (`str`, *optional*, defaults to `"<pad>"`):
            The token used for padding, for example when batching sequences of different lengths.
-        mask_token (:obj:`str`, `optional`, defaults to :obj:`"<mask>"`):
+        mask_token (`str`, *optional*, defaults to `"<mask>"`):
            The token used for masking values. This is the token used when training this model with masked language
            modeling. This is the token which the model will try to predict.
-        cls_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[0, 0, 0, 0]`):
+        cls_token_box (`List[int]`, *optional*, defaults to `[0, 0, 0, 0]`):
            The bounding box to use for the special [CLS] token.
-        sep_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[1000, 1000, 1000, 1000]`):
+        sep_token_box (`List[int]`, *optional*, defaults to `[1000, 1000, 1000, 1000]`):
            The bounding box to use for the special [SEP] token.
-        pad_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[0, 0, 0, 0]`):
+        pad_token_box (`List[int]`, *optional*, defaults to `[0, 0, 0, 0]`):
            The bounding box to use for the special [PAD] token.
-        pad_token_label (:obj:`int`, `optional`, defaults to -100):
-            The label to use for padding tokens. Defaults to -100, which is the :obj:`ignore_index` of PyTorch's
+        pad_token_label (`int`, *optional*, defaults to -100):
+            The label to use for padding tokens. Defaults to -100, which is the `ignore_index` of PyTorch's
            CrossEntropyLoss.
-        only_label_first_subword (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        only_label_first_subword (`bool`, *optional*, defaults to `True`):
            Whether or not to only label the first subword, in case word labels are provided.
-        additional_special_tokens (:obj:`List[str]`, `optional`, defaults to :obj:`["<s>NOTUSED", "</s>NOTUSED"]`):
+        additional_special_tokens (`List[str]`, *optional*, defaults to `["<s>NOTUSED", "</s>NOTUSED"]`):
            Additional special tokens used by the tokenizer.
-        sp_model_kwargs (:obj:`dict`, `optional`):
-            Will be passed to the ``SentencePieceProcessor.__init__()`` method. The `Python wrapper for SentencePiece
-            <https://github.com/google/sentencepiece/tree/master/python>`__ can be used, among other things, to set:
+        sp_model_kwargs (`dict`, *optional*):
+            Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things, to set:

-            - ``enable_sampling``: Enable subword regularization.
-            - ``nbest_size``: Sampling parameters for unigram. Invalid for BPE-Dropout.
+            - `enable_sampling`: Enable subword regularization.
+            - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.

-              - ``nbest_size = {0,1}``: No sampling is performed.
-              - ``nbest_size > 1``: samples from the nbest_size results.
-              - ``nbest_size < 0``: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
+              - `nbest_size = {0,1}`: No sampling is performed.
+              - `nbest_size > 1`: samples from the nbest_size results.
+              - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
                using forward-filtering-and-backward-sampling algorithm.

-            - ``alpha``: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
+            - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
              BPE-dropout.

    Attributes:
-        sp_model (:obj:`SentencePieceProcessor`):
-            The `SentencePiece` processor that is used for every conversion (string, tokens and IDs).
+        sp_model (`SentencePieceProcessor`):
+            The *SentencePiece* processor that is used for every conversion (string, tokens and IDs).
    """

    vocab_files_names = VOCAB_FILES_NAMES
@@ -212,17 +217,17 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. An XLM-RoBERTa sequence has the following format:

-        - single sequence: ``<s> X </s>``
-        - pair of sequences: ``<s> A </s></s> B </s>``
+        - single sequence: `<s> X </s>`
+        - pair of sequences: `<s> A </s></s> B </s>`

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
-            :obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
+            `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """

        if token_ids_1 is None:
@@ -236,18 +241,18 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
-        special tokens using the tokenizer ``prepare_for_model`` method.
+        special tokens using the tokenizer `prepare_for_model` method.

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
-            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
-            :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """

        if already_has_special_tokens:
@@ -267,13 +272,13 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
        not make use of token type ids, therefore a list of zeros is returned.

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
-            :obj:`List[int]`: List of zeros.
+            `List[int]`: List of zeros.

        """

@@ -357,16 +362,16 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
        sequences with word-level normalized bounding boxes and optional labels.

        Args:
-            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string, a list of strings
                (words of a single example or questions of a batch of examples) or a list of list of strings (batch of
                words).
-            text_pair (:obj:`List[str]`, :obj:`List[List[str]]`):
+            text_pair (`List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence should be a list of strings
                (pretokenized string).
-            boxes (:obj:`List[List[int]]`, :obj:`List[List[List[int]]]`):
+            boxes (`List[List[int]]`, `List[List[List[int]]]`):
                Word-level bounding boxes. Each bounding box should be normalized to be on a 0-1000 scale.
-            word_labels (:obj:`List[int]`, :obj:`List[List[int]]`, `optional`):
+            word_labels (`List[int]`, `List[List[int]]`, *optional*):
                Word-level integer labels (for token classification tasks such as FUNSD, CORD).
        """
        # Input type checking for clearer error
@@ -684,14 +689,14 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
        truncates sequences if overflowing while taking into account the special tokens and manages a moving window
        (with user defined stride) for overflowing tokens.

-        Word-level :obj:`boxes` are turned into token-level :obj:`bbox`. If provided, word-level :obj:`word_labels` are
-        turned into token-level :obj:`labels`. The word label is used for the first token of the word, while remaining
+        Word-level `boxes` are turned into token-level `bbox`. If provided, word-level `word_labels` are
+        turned into token-level `labels`. The word label is used for the first token of the word, while remaining
        tokens are labeled with -100, such that they will be ignored by the loss function.

        Args:
-            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`):
                The first sequence to be encoded. This can be a string, a list of strings or a list of list of strings.
-            text_pair (:obj:`List[str]` or :obj:`List[int]`, `optional`):
+            text_pair (`List[str]` or `List[int]`, *optional*):
                Optional second sequence to be encoded. This can be a list of strings (words of a single example) or a
                list of list of strings (words of a batch of examples).
        """
@@ -868,41 +873,41 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
        Truncates a sequence pair in-place following the strategy.

        Args:
-            ids (:obj:`List[int]`):
-                Tokenized input ids of the first sequence. Can be obtained from a string by chaining the ``tokenize``
-                and ``convert_tokens_to_ids`` methods.
-            token_boxes (:obj:`List[List[int]]`):
+            ids (`List[int]`):
+                Tokenized input ids of the first sequence. Can be obtained from a string by chaining the `tokenize`
+                and `convert_tokens_to_ids` methods.
+            token_boxes (`List[List[int]]`):
                Bounding boxes of the first sequence.
-            pair_ids (:obj:`List[int]`, `optional`):
-                Tokenized input ids of the second sequence. Can be obtained from a string by chaining the ``tokenize``
-                and ``convert_tokens_to_ids`` methods.
-            pair_token_boxes (:obj:`List[List[int]]`, `optional`):
+            pair_ids (`List[int]`, *optional*):
+                Tokenized input ids of the second sequence. Can be obtained from a string by chaining the `tokenize`
+                and `convert_tokens_to_ids` methods.
+            pair_token_boxes (`List[List[int]]`, *optional*):
                Bounding boxes of the second sequence.
-            labels (:obj:`List[int]`, `optional`):
+            labels (`List[int]`, *optional*):
                Labels of the first sequence (for token classification tasks).
-            num_tokens_to_remove (:obj:`int`, `optional`, defaults to 0):
+            num_tokens_to_remove (`int`, *optional*, defaults to 0):
                Number of tokens to remove using the truncation strategy.
-            truncation_strategy (:obj:`str` or :class:`~transformers.tokenization_utils_base.TruncationStrategy`, `optional`, defaults to :obj:`False`):
+            truncation_strategy (`str` or [`~tokenization_utils_base.TruncationStrategy`], *optional*, defaults to `False`):
                The strategy to follow for truncation. Can be:

-                * :obj:`'longest_first'`: Truncate to a maximum length specified with the argument :obj:`max_length` or
+                - `'longest_first'`: Truncate to a maximum length specified with the argument `max_length` or
                  to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate token by token, removing a token from the longest sequence in the pair if a pair of
                  sequences (or a batch of pairs) is provided.
-                * :obj:`'only_first'`: Truncate to a maximum length specified with the argument :obj:`max_length` or to
+                - `'only_first'`: Truncate to a maximum length specified with the argument `max_length` or to
                  the maximum acceptable input length for the model if that argument is not provided. This will only
                  truncate the first sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'only_second'`: Truncate to a maximum length specified with the argument :obj:`max_length` or
+                - `'only_second'`: Truncate to a maximum length specified with the argument `max_length` or
                  to the maximum acceptable input length for the model if that argument is not provided. This will only
                  truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
+                - `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).
-            stride (:obj:`int`, `optional`, defaults to 0):
+            stride (`int`, *optional*, defaults to 0):
                If set to a positive number, the overflowing tokens returned will contain some tokens from the main
                sequence returned. The value of this argument defines the number of additional tokens.

        Returns:
-            :obj:`Tuple[List[int], List[int], List[int]]`: The truncated ``ids``, the truncated ``pair_ids`` and the
+            `Tuple[List[int], List[int], List[int]]`: The truncated `ids`, the truncated `pair_ids` and the
            list of overflowing tokens.
        """
        if num_tokens_to_remove <= 0:

--- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py
+++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py
@@ -52,57 +52,62 @@ logger = logging.get_logger(__name__)

 class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
    """
-    Construct a "fast" LayoutXLM tokenizer (backed by HuggingFace's `tokenizers` library). Adapted from
-    :class:`~transformers.RobertaTokenizer` and :class:`~transformers.XLNetTokenizer`. Based on `BPE
-    <https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=BPE#models>`__.
+    Construct a "fast" LayoutXLM tokenizer (backed by HuggingFace's *tokenizers* library). Adapted from
+    [`RobertaTokenizer`] and [`XLNetTokenizer`]. Based on [BPE](https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=BPE#models).

-    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main
+    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main
    methods. Users should refer to this superclass for more information regarding those methods.

    Args:
-        vocab_file (:obj:`str`):
+        vocab_file (`str`):
            Path to the vocabulary file.
-        bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
+        bos_token (`str`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.

-            .. note::
+            <Tip>

-                When building a sequence using special tokens, this is not the token that is used for the beginning of
-                sequence. The token used is the :obj:`cls_token`.
-        eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
+            When building a sequence using special tokens, this is not the token that is used for the beginning of
+            sequence. The token used is the `cls_token`.
+
+            </Tip>
+
+        eos_token (`str`, *optional*, defaults to `"</s>"`):
            The end of sequence token.

-            .. note::
+            <Tip>
+
+            When building a sequence using special tokens, this is not the token that is used for the end of
+            sequence. The token used is the `sep_token`.
+
+            </Tip>

-                When building a sequence using special tokens, this is not the token that is used for the end of
-                sequence. The token used is the :obj:`sep_token`.
-        sep_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
+        sep_token (`str`, *optional*, defaults to `"</s>"`):
            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
            sequence classification or for a text and a question for question answering. It is also used as the last
            token of a sequence built with special tokens.
-        cls_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
+        cls_token (`str`, *optional*, defaults to `"<s>"`):
            The classifier token which is used when doing sequence classification (classification of the whole sequence
            instead of per-token classification). It is the first token of the sequence when built with special tokens.
-        unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
+        unk_token (`str`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
-        pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`):
+        pad_token (`str`, *optional*, defaults to `"<pad>"`):
            The token used for padding, for example when batching sequences of different lengths.
-        mask_token (:obj:`str`, `optional`, defaults to :obj:`"<mask>"`):
+        mask_token (`str`, *optional*, defaults to `"<mask>"`):
            The token used for masking values. This is the token used when training this model with masked language
            modeling. This is the token which the model will try to predict.
-        cls_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[0, 0, 0, 0]`):
+        cls_token_box (`List[int]`, *optional*, defaults to `[0, 0, 0, 0]`):
            The bounding box to use for the special [CLS] token.
-        sep_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[1000, 1000, 1000, 1000]`):
+        sep_token_box (`List[int]`, *optional*, defaults to `[1000, 1000, 1000, 1000]`):
            The bounding box to use for the special [SEP] token.
-        pad_token_box (:obj:`List[int]`, `optional`, defaults to :obj:`[0, 0, 0, 0]`):
+        pad_token_box (`List[int]`, *optional*, defaults to `[0, 0, 0, 0]`):
            The bounding box to use for the special [PAD] token.
-        pad_token_label (:obj:`int`, `optional`, defaults to -100):
-            The label to use for padding tokens. Defaults to -100, which is the :obj:`ignore_index` of PyTorch's
+        pad_token_label (`int`, *optional*, defaults to -100):
+            The label to use for padding tokens. Defaults to -100, which is the `ignore_index` of PyTorch's
            CrossEntropyLoss.
-        only_label_first_subword (:obj:`bool`, `optional`, defaults to :obj:`True`):
+        only_label_first_subword (`bool`, *optional*, defaults to `True`):
            Whether or not to only label the first subword, in case word labels are provided.
-        additional_special_tokens (:obj:`List[str]`, `optional`, defaults to :obj:`["<s>NOTUSED", "</s>NOTUSED"]`):
+        additional_special_tokens (`List[str]`, *optional*, defaults to `["<s>NOTUSED", "</s>NOTUSED"]`):
            Additional special tokens used by the tokenizer.
    """

@@ -189,16 +194,16 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
        sequences with word-level normalized bounding boxes and optional labels.

        Args:
-            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string, a list of strings
                (words of a single example or questions of a batch of examples) or a list of list of strings (batch of
                words).
-            text_pair (:obj:`List[str]`, :obj:`List[List[str]]`):
+            text_pair (`List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence should be a list of strings
                (pretokenized string).
-            boxes (:obj:`List[List[int]]`, :obj:`List[List[List[int]]]`):
+            boxes (`List[List[int]]`, `List[List[List[int]]]`):
                Word-level bounding boxes. Each bounding box should be normalized to be on a 0-1000 scale.
-            word_labels (:obj:`List[int]`, :obj:`List[List[int]]`, `optional`):
+            word_labels (`List[int]`, `List[List[int]]`, *optional*):
                Word-level integer labels (for token classification tasks such as FUNSD, CORD).
        """
        # Input type checking for clearer error
@@ -630,17 +635,17 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. An XLM-RoBERTa sequence has the following format:

-        - single sequence: ``<s> X </s>``
-        - pair of sequences: ``<s> A </s></s> B </s>``
+        - single sequence: `<s> X </s>`
+        - pair of sequences: `<s> A </s></s> B </s>`

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
-            :obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
+            `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """

        if token_ids_1 is None:
@@ -657,13 +662,13 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
        not make use of token type ids, therefore a list of zeros is returned.

        Args:
-            token_ids_0 (:obj:`List[int]`):
+            token_ids_0 (`List[int]`):
                List of IDs.
-            token_ids_1 (:obj:`List[int]`, `optional`):
+            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
-            :obj:`List[int]`: List of zeros.
+            `List[int]`: List of zeros.

        """