Result of new doc style with fixes (#17015)

* Result of new doc style with fixes * Add last two files * Bump hf-doc-builder

Result of new doc style with fixes (#17015)
* Result of new doc style with fixes * Add last two files * Bump hf-doc-builder
7152ed2b · Sylvain Gugger · GitHub · 18df4407 · 7152ed2b · 7152ed2b
Unverified Commit 7152ed2b authored Apr 29, 2022 by Sylvain Gugger Committed by GitHub Apr 29, 2022
8 changed files
--- a/src/transformers/models/tapas/modeling_tapas.py
+++ b/src/transformers/models/tapas/modeling_tapas.py
@@ -1068,7 +1068,7 @@ class TapasForMaskedLM(TapasPreTrainedModel):
        ... )
        >>> labels = tokenizer(
        ...     table=table, queries="How many movies has George Clooney played in?", return_tensors="pt"
-        >>> )["input_ids"]
+        ... )["input_ids"]
        >>> outputs = model(**inputs, labels=labels)
        >>> logits = outputs.logits

--- a/src/transformers/models/tapas/modeling_tf_tapas.py
+++ b/src/transformers/models/tapas/modeling_tf_tapas.py
@@ -1095,7 +1095,7 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
        ... )
        >>> labels = tokenizer(
        ...     table=table, queries="How many movies has George Clooney played in?", return_tensors="tf"
-        >>> )["input_ids"]
+        ... )["input_ids"]
        >>> outputs = model(**inputs, labels=labels)
        >>> logits = outputs.logits

--- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py
+++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py
@@ -326,7 +326,7 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
        >>> output_ids = model.generate(
        ...     pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True
-        >>> ).sequences
+        ... ).sequences
        >>> preds = decoder_tokenizer.batch_decode(output_ids, skip_special_tokens=True)
        >>> preds = [pred.strip() for pred in preds]

--- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py
@@ -1081,7 +1081,7 @@ FLAX_WAV2VEC2_MODEL_DOCSTRING = """
    >>> input_values = processor(
    ...     ds["speech"][0], sampling_rate=16_000, return_tensors="np"
-    >>> ).input_values  # Batch size 1
+    ... ).input_values  # Batch size 1
    >>> hidden_states = model(input_values).last_hidden_state
    ```
 """
@@ -1200,7 +1200,7 @@ FLAX_WAV2VEC2_FOR_CTC_DOCSTRING = """
    >>> input_values = processor(
    ...     ds["speech"][0], sampling_rate=16_000, return_tensors="np"
-    >>> ).input_values  # Batch size 1
+    ... ).input_values  # Batch size 1
    >>> logits = model(input_values).logits
    >>> predicted_ids = jnp.argmax(logits, axis=-1)

--- a/src/transformers/models/xlm/modeling_xlm.py
+++ b/src/transformers/models/xlm/modeling_xlm.py
@@ -1039,7 +1039,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
        >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(
        ...     0
-        >>> )  # Batch size 1
+        ... )  # Batch size 1
        >>> start_positions = torch.tensor([1])
        >>> end_positions = torch.tensor([3])

--- a/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py
+++ b/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py
@@ -98,7 +98,7 @@ class XLMProphetNetModel(ProphetNetModel):
    >>> input_ids = tokenizer(
    ...     "Studies have been shown that owning a dog is good for you", return_tensors="pt"
-    >>> ).input_ids  # Batch size 1
+    ... ).input_ids  # Batch size 1
    >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids  # Batch size 1
    >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
@@ -124,7 +124,7 @@ class XLMProphetNetForConditionalGeneration(ProphetNetForConditionalGeneration):
    >>> input_ids = tokenizer(
    ...     "Studies have been shown that owning a dog is good for you", return_tensors="pt"
-    >>> ).input_ids  # Batch size 1
+    ... ).input_ids  # Batch size 1
    >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids  # Batch size 1
    >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)

--- a/src/transformers/models/xlnet/modeling_tf_xlnet.py
+++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py
@@ -1281,17 +1281,17 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss):
        >>> # We show how to setup inputs to predict a next token using a bi-directional context.
        >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True))[
        ...     None, :
-        >>> ]  # We will predict the masked token
+        ... ]  # We will predict the masked token
        >>> perm_mask = np.zeros((1, input_ids.shape[1], input_ids.shape[1]))
        >>> perm_mask[:, :, -1] = 1.0  # Previous tokens don't see last token
        >>> target_mapping = np.zeros(
        ...     (1, 1, input_ids.shape[1])
-        >>> )  # Shape [1, 1, seq_length] => let's predict one token
+        ... )  # Shape [1, 1, seq_length] => let's predict one token
        >>> target_mapping[
        ...     0, 0, -1
-        >>> ] = 1.0  # Our first (and only) prediction will be the last token of the sequence (the masked token)
+        ... ] = 1.0  # Our first (and only) prediction will be the last token of the sequence (the masked token)
        >>> outputs = model(
        ...     input_ids,
@@ -1301,7 +1301,7 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss):
        >>> next_token_logits = outputs[
        ...     0
-        >>> ]  # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
+        ... ]  # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
        ```"""
        transformer_outputs = self.transformer(
            input_ids=input_ids,

--- a/src/transformers/models/xlnet/modeling_xlnet.py
+++ b/src/transformers/models/xlnet/modeling_xlnet.py
@@ -1400,47 +1400,47 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
        >>> # We show how to setup inputs to predict a next token using a bi-directional context.
        >>> input_ids = torch.tensor(
        ...     tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)
-        >>> ).unsqueeze(
+        ... ).unsqueeze(
        ...     0
-        >>> )  # We will predict the masked token
+        ... )  # We will predict the masked token
        >>> perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
        >>> perm_mask[:, :, -1] = 1.0  # Previous tokens don't see last token
        >>> target_mapping = torch.zeros(
        ...     (1, 1, input_ids.shape[1]), dtype=torch.float
-        >>> )  # Shape [1, 1, seq_length] => let's predict one token
+        ... )  # Shape [1, 1, seq_length] => let's predict one token
        >>> target_mapping[
        ...     0, 0, -1
-        >>> ] = 1.0  # Our first (and only) prediction will be the last token of the sequence (the masked token)
+        ... ] = 1.0  # Our first (and only) prediction will be the last token of the sequence (the masked token)
        >>> outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
        >>> next_token_logits = outputs[
        ...     0
-        >>> ]  # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
+        ... ]  # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
        >>> # The same way can the XLNetLMHeadModel be used to be trained by standard auto-regressive language modeling.
        >>> input_ids = torch.tensor(
        ...     tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)
-        >>> ).unsqueeze(
+        ... ).unsqueeze(
        ...     0
-        >>> )  # We will predict the masked token
+        ... )  # We will predict the masked token
        >>> labels = torch.tensor(tokenizer.encode("cute", add_special_tokens=False)).unsqueeze(0)
        >>> assert labels.shape[0] == 1, "only one word will be predicted"
        >>> perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
        >>> perm_mask[
        ...     :, :, -1
-        >>> ] = 1.0  # Previous tokens don't see last token as is done in standard auto-regressive lm training
+        ... ] = 1.0  # Previous tokens don't see last token as is done in standard auto-regressive lm training
        >>> target_mapping = torch.zeros(
        ...     (1, 1, input_ids.shape[1]), dtype=torch.float
-        >>> )  # Shape [1, 1, seq_length] => let's predict one token
+        ... )  # Shape [1, 1, seq_length] => let's predict one token
        >>> target_mapping[
        ...     0, 0, -1
-        >>> ] = 1.0  # Our first (and only) prediction will be the last token of the sequence (the masked token)
+        ... ] = 1.0  # Our first (and only) prediction will be the last token of the sequence (the masked token)
        >>> outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels)
        >>> loss = outputs.loss
        >>> next_token_logits = (
        ...     outputs.logits
-        >>> )  # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
+        ... )  # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
        ```"""
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
@@ -1980,7 +1980,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
        >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(
        ...     0
-        >>> )  # Batch size 1
+        ... )  # Batch size 1
        >>> start_positions = torch.tensor([1])
        >>> end_positions = torch.tensor([3])
        >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)