Doc styler examples (#14953)

* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from

Doc styler examples (#14953)
* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from
b5e2b183 · Sylvain Gugger · GitHub · e13f72fb · b5e2b183 · b5e2b183
Unverified Commit b5e2b183 authored Dec 27, 2021 by Sylvain Gugger Committed by GitHub Dec 27, 2021
20 changed files
--- a/docs/source/model_doc/blenderbot.mdx
+++ b/docs/source/model_doc/blenderbot.mdx
@@ -50,11 +50,12 @@ Here is an example of model usage:
 ```python
 >>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
->>> mname = 'facebook/blenderbot-400M-distill'
+>>> mname = "facebook/blenderbot-400M-distill"
 >>> model = BlenderbotForConditionalGeneration.from_pretrained(mname)
 >>> tokenizer = BlenderbotTokenizer.from_pretrained(mname)
 >>> UTTERANCE = "My friends are cool but they eat too many carbs."
->>> inputs = tokenizer([UTTERANCE], return_tensors='pt')
+>>> inputs = tokenizer([UTTERANCE], return_tensors="pt")
 >>> reply_ids = model.generate(**inputs)
 >>> print(tokenizer.batch_decode(reply_ids))
 ["<s> That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?</s>"]

--- a/docs/source/model_doc/byt5.mdx
+++ b/docs/source/model_doc/byt5.mdx
@@ -51,10 +51,12 @@ ByT5 works on raw UTF-8 bytes, so it can be used without a tokenizer:
 from transformers import T5ForConditionalGeneration
 import torch
-model = T5ForConditionalGeneration.from_pretrained('google/byt5-small')
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
 input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + 3  # add 3 for special tokens
-labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + 3  # add 3 for special tokens
+labels = (
+    torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + 3
+)  # add 3 for special tokens
 loss = model(input_ids, labels=labels).loss  # forward pass
 ```
@@ -64,11 +66,15 @@ For batched inference and training it is however recommended to make use of the
 ```python
 from transformers import T5ForConditionalGeneration, AutoTokenizer
-model = T5ForConditionalGeneration.from_pretrained('google/byt5-small')
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
-tokenizer = AutoTokenizer.from_pretrained('google/byt5-small')
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
-model_inputs = tokenizer(["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt")
+model_inputs = tokenizer(
-labels = tokenizer(["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt").input_ids
+    ["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt"
+)
+labels = tokenizer(
+    ["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt"
+).input_ids
 loss = model(**model_inputs, labels=labels).loss  # forward pass
 ```

--- a/docs/source/model_doc/canine.mdx
+++ b/docs/source/model_doc/canine.mdx
@@ -64,7 +64,7 @@ CANINE works on raw characters, so it can be used without a tokenizer:
 >>> from transformers import CanineModel
 >>> import torch
->>> model = CanineModel.from_pretrained('google/canine-c') # model pre-trained with autoregressive character loss
+>>> model = CanineModel.from_pretrained("google/canine-c")  # model pre-trained with autoregressive character loss
 >>> text = "hello world"
 >>> # use Python's built-in ord() function to turn each character into its unicode code point id
@@ -81,8 +81,8 @@ sequences to the same length):
 ```python
 >>> from transformers import CanineTokenizer, CanineModel
->>> model = CanineModel.from_pretrained('google/canine-c')
+>>> model = CanineModel.from_pretrained("google/canine-c")
->>> tokenizer = CanineTokenizer.from_pretrained('google/canine-c')
+>>> tokenizer = CanineTokenizer.from_pretrained("google/canine-c")
 >>> inputs = ["Life is like a box of chocolates.", "You never know what you gonna get."]
 >>> encoding = tokenizer(inputs, padding="longest", truncation=True, return_tensors="pt")

--- a/docs/source/model_doc/clip.mdx
+++ b/docs/source/model_doc/clip.mdx
--- a/docs/source/model_doc/gpt_neo.mdx
+++ b/docs/source/model_doc/gpt_neo.mdx
@@ -29,16 +29,24 @@ The `generate()` method can be used to generate text using GPT Neo model.
 ```python
 >>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer
 >>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
 >>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
->>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
+>>> prompt = (
-...          "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
+...     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+...     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
 ...     "researchers was the fact that the unicorns spoke perfect English."
+... )
 >>> input_ids = tokenizer(prompt, return_tensors="pt").input_ids
->>> gen_tokens = model.generate(input_ids, do_sample=True, temperature=0.9, max_length=100,)
+>>> gen_tokens = model.generate(
+...     input_ids,
+...     do_sample=True,
+...     temperature=0.9,
+...     max_length=100,
+... )
 >>> gen_text = tokenizer.batch_decode(gen_tokens)[0]
 ```

--- a/docs/source/model_doc/gptj.mdx
+++ b/docs/source/model_doc/gptj.mdx
@@ -33,7 +33,9 @@ Tips:
 >>> from transformers import GPTJForCausalLM
 >>> import torch
->>> model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", torch_dtype=torch.float16, low_cpu_mem_usage=True)
+>>> model = GPTJForCausalLM.from_pretrained(
+...     "EleutherAI/gpt-j-6B", revision="float16", torch_dtype=torch.float16, low_cpu_mem_usage=True
+... )
 ```
 - The model should fit on 16GB GPU for inference. For training/fine-tuning it would take much more GPU RAM. Adam
@@ -56,16 +58,24 @@ model.
 ```python
 >>> from transformers import AutoModelForCausalLM, AutoTokenizer
 >>> model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
 >>> tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
->>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
+>>> prompt = (
-...          "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
+...     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+...     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
 ...     "researchers was the fact that the unicorns spoke perfect English."
+... )
 >>> input_ids = tokenizer(prompt, return_tensors="pt").input_ids
->>> gen_tokens = model.generate(input_ids, do_sample=True, temperature=0.9, max_length=100,)
+>>> gen_tokens = model.generate(
+...     input_ids,
+...     do_sample=True,
+...     temperature=0.9,
+...     max_length=100,
+... )
 >>> gen_text = tokenizer.batch_decode(gen_tokens)[0]
 ```
@@ -78,13 +88,20 @@ model.
 >>> model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.float16)
 >>> tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
->>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
+>>> prompt = (
-...          "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
+...     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+...     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
 ...     "researchers was the fact that the unicorns spoke perfect English."
+... )
 >>> input_ids = tokenizer(prompt, return_tensors="pt").input_ids
->>> gen_tokens = model.generate(input_ids, do_sample=True, temperature=0.9, max_length=100,)
+>>> gen_tokens = model.generate(
+...     input_ids,
+...     do_sample=True,
+...     temperature=0.9,
+...     max_length=100,
+... )
 >>> gen_text = tokenizer.batch_decode(gen_tokens)[0]
 ```

--- a/docs/source/model_doc/herbert.mdx
+++ b/docs/source/model_doc/herbert.mdx
@@ -41,7 +41,7 @@ Examples of use:
 >>> tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
 >>> model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
->>> encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
+>>> encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors="pt")
 >>> outputs = model(encoded_input)
 >>> # HerBERT can also be loaded using AutoTokenizer and AutoModel:

--- a/docs/source/model_doc/layoutlm.mdx
+++ b/docs/source/model_doc/layoutlm.mdx
--- a/docs/source/model_doc/layoutlmv2.mdx
+++ b/docs/source/model_doc/layoutlmv2.mdx
@@ -158,7 +158,9 @@ from PIL import Image
 processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
 image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
-encoding = processor(image, return_tensors="pt") # you can also add all tokenizer parameters here such as padding, truncation
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
 print(encoding.keys())
 # dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
 ```

--- a/docs/source/model_doc/layoutxlm.mdx
+++ b/docs/source/model_doc/layoutxlm.mdx
@@ -34,7 +34,7 @@ One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like
 ```python
 from transformers import LayoutLMv2Model
-model = LayoutLMv2Model.from_pretrained('microsoft/layoutxlm-base')
+model = LayoutLMv2Model.from_pretrained("microsoft/layoutxlm-base")
 ```
 Note that LayoutXLM has its own tokenizer, based on
@@ -44,7 +44,7 @@ follows:
 ```python
 from transformers import LayoutXLMTokenizer
-tokenizer = LayoutXLMTokenizer.from_pretrained('microsoft/layoutxlm-base')
+tokenizer = LayoutXLMTokenizer.from_pretrained("microsoft/layoutxlm-base")
 ```
 Similar to LayoutLMv2, you can use [`LayoutXLMProcessor`] (which internally applies

--- a/docs/source/model_doc/longformer.mdx
+++ b/docs/source/model_doc/longformer.mdx
@@ -75,8 +75,8 @@ For more information, please refer to the official [paper](https://arxiv.org/pdf
 trained and should be used as follows:
 ```python
-input_ids = tokenizer.encode('This is a sentence from [MASK] training data', return_tensors='pt')
+input_ids = tokenizer.encode("This is a sentence from [MASK] training data", return_tensors="pt")
-mlm_labels = tokenizer.encode('This is a sentence from the training data', return_tensors='pt')
+mlm_labels = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
 loss = model(input_ids, labels=input_ids, masked_lm_labels=mlm_labels)[0]
 ```

--- a/docs/source/model_doc/luke.mdx
+++ b/docs/source/model_doc/luke.mdx
@@ -84,24 +84,27 @@ Example:
 >>> model = LukeModel.from_pretrained("studio-ousia/luke-base")
 >>> tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
 # Example 1: Computing the contextualized entity representation corresponding to the entity mention "Beyoncé"
 >>> text = "Beyoncé lives in Los Angeles."
 >>> entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
 >>> inputs = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
 >>> outputs = model(**inputs)
 >>> word_last_hidden_state = outputs.last_hidden_state
 >>> entity_last_hidden_state = outputs.entity_last_hidden_state
 # Example 2: Inputting Wikipedia entities to obtain enriched contextualized representations
->>> entities = ["Beyoncé", "Los Angeles"]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
+>>> entities = [
+...     "Beyoncé",
+...     "Los Angeles",
+>>> ]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
 >>> entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
 >>> inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
 >>> outputs = model(**inputs)
 >>> word_last_hidden_state = outputs.last_hidden_state
 >>> entity_last_hidden_state = outputs.entity_last_hidden_state
 # Example 3: Classifying the relationship between two entities using LukeForEntityPairClassification head model
 >>> model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
 >>> tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
 >>> entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"

--- a/docs/source/model_doc/m2m_100.mdx
+++ b/docs/source/model_doc/m2m_100.mdx
@@ -49,8 +49,8 @@ examples. To install `sentencepiece` run `pip install sentencepiece`.
 ```python
 from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer
-model = M2M100ForConditionalGeneration.from_pretrained('facebook/m2m100_418M')
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
-tokenizer = M2M100Tokenizer.from_pretrained('facebook/m2m100_418M', src_lang="en", tgt_lang="fr")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="fr")
 src_text = "Life is like a box of chocolates."
 tgt_text = "La vie est comme une boîte de chocolat."

--- a/docs/source/model_doc/marian.mdx
+++ b/docs/source/model_doc/marian.mdx
@@ -65,13 +65,14 @@ require 3 character language codes:
 ```python
 >>> from transformers import MarianMTModel, MarianTokenizer
 >>> src_text = [
-...     '>>fra<< this is a sentence in english that we want to translate to french',
+...     ">>fra<< this is a sentence in english that we want to translate to french",
-...     '>>por<< This should go to portuguese',
+...     ">>por<< This should go to portuguese",
-...     '>>esp<< And this to Spanish'
+...     ">>esp<< And this to Spanish",
->>> ]
+... ]
->>> model_name = 'Helsinki-NLP/opus-mt-en-roa'
+>>> model_name = "Helsinki-NLP/opus-mt-en-roa"
 >>> tokenizer = MarianTokenizer.from_pretrained(model_name)
 >>> print(tokenizer.supported_language_codes)
 ['>>zlm_Latn<<', '>>mfe<<', '>>hat<<', '>>pap<<', '>>ast<<', '>>cat<<', '>>ind<<', '>>glg<<', '>>wln<<', '>>spa<<', '>>fra<<', '>>ron<<', '>>por<<', '>>ita<<', '>>oci<<', '>>arg<<', '>>min<<']
@@ -88,11 +89,12 @@ Here is the code to see all available pretrained models on the hub:
 ```python
 from huggingface_hub import list_models
 model_list = list_models()
 org = "Helsinki-NLP"
 model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
-suffix = [x.split('/')[1] for x in model_ids]
+suffix = [x.split("/")[1] for x in model_ids]
-old_style_multi_models = [f'{org}/{s}' for s in suffix if s != s.lower()]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
 ```
 ## Old Style Multi-Lingual Models
@@ -100,7 +102,7 @@ old_style_multi_models = [f'{org}/{s}' for s in suffix if s != s.lower()]
 These are the old style multi-lingual models ported from the OPUS-MT-Train repo: and the members of each language
 group:
-```python
+```python no-style
 ['Helsinki-NLP/opus-mt-NORTH_EU-NORTH_EU',
 'Helsinki-NLP/opus-mt-ROMANCE-en',
 'Helsinki-NLP/opus-mt-SCANDINAVIA-SCANDINAVIA',
@@ -129,13 +131,14 @@ Example of translating english to many romance languages, using old-style 2 char
 ```python
 >>> from transformers import MarianMTModel, MarianTokenizer
 >>> src_text = [
-...     '>>fr<< this is a sentence in english that we want to translate to french',
+...     ">>fr<< this is a sentence in english that we want to translate to french",
-...     '>>pt<< This should go to portuguese',
+...     ">>pt<< This should go to portuguese",
-...     '>>es<< And this to Spanish'
+...     ">>es<< And this to Spanish",
->>> ]
+... ]
->>> model_name = 'Helsinki-NLP/opus-mt-en-ROMANCE'
+>>> model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
 >>> tokenizer = MarianTokenizer.from_pretrained(model_name)
 >>> model = MarianMTModel.from_pretrained(model_name)

--- a/docs/source/model_doc/mbart.mdx
+++ b/docs/source/model_doc/mbart.mdx
@@ -52,7 +52,7 @@ inside the context manager [`~MBartTokenizer.as_target_tokenizer`] to encode tar
 >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
 >>> # forward pass
->>> model(**inputs, labels=batch['labels'])
+>>> model(**inputs, labels=batch["labels"])
 ```
 - Generation

--- a/docs/source/model_doc/mluke.mdx
+++ b/docs/source/model_doc/mluke.mdx
@@ -38,7 +38,7 @@ One can directly plug in the weights of mLUKE into a LUKE model, like so:
 ```python
 from transformers import LukeModel
-model = LukeModel.from_pretrained('studio-ousia/mluke-base')
+model = LukeModel.from_pretrained("studio-ousia/mluke-base")
 ```
 Note that mLUKE has its own tokenizer, [`MLukeTokenizer`]. You can initialize it as follows:
@@ -46,7 +46,7 @@ Note that mLUKE has its own tokenizer, [`MLukeTokenizer`]. You can initialize it
 ```python
 from transformers import MLukeTokenizer
-tokenizer = MLukeTokenizer.from_pretrained('studio-ousia/mluke-base')
+tokenizer = MLukeTokenizer.from_pretrained("studio-ousia/mluke-base")
 ```
 As mLUKE's architecture is equivalent to that of LUKE, one can refer to [LUKE's documentation page](luke) for all

--- a/docs/source/model_doc/pegasus.mdx
+++ b/docs/source/model_doc/pegasus.mdx
@@ -69,18 +69,22 @@ All the [checkpoints](https://huggingface.co/models?search=pegasus) are fine-tun
 ```python
 >>> from transformers import PegasusForConditionalGeneration, PegasusTokenizer
 >>> import torch
 >>> src_text = [
 ...     """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""
->>> ]
+... ]
->>> model_name = 'google/pegasus-xsum'
+... model_name = "google/pegasus-xsum"
->>> device = 'cuda' if torch.cuda.is_available() else 'cpu'
+... device = "cuda" if torch.cuda.is_available() else "cpu"
->>> tokenizer = PegasusTokenizer.from_pretrained(model_name)
+... tokenizer = PegasusTokenizer.from_pretrained(model_name)
->>> model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
+... model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
->>> batch = tokenizer(src_text, truncation=True, padding='longest', return_tensors="pt").to(device)
+... batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device)
->>> translated = model.generate(**batch)
+... translated = model.generate(**batch)
->>> tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+... tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
->>> assert tgt_text[0] == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+... assert (
+...     tgt_text[0]
+...     == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+... )
 ```
 ## PegasusConfig

--- a/docs/source/model_doc/qdqbert.mdx
+++ b/docs/source/model_doc/qdqbert.mdx
@@ -75,9 +75,9 @@ tensors. After setting up the tensor quantizers, one can use the following examp
 ```python
 >>> # Find the TensorQuantizer and enable calibration
 >>> for name, module in model.named_modules():
->>>     if name.endswith('_input_quantizer'):
+...     if name.endswith("_input_quantizer"):
->>>         module.enable_calib()
+...         module.enable_calib()
->>>         module.disable_quant()  # Use full precision data to calibrate
+...         module.disable_quant()  # Use full precision data to calibrate
 >>> # Feeding data samples
 >>> model(x)
@@ -85,9 +85,9 @@ tensors. After setting up the tensor quantizers, one can use the following examp
 >>> # Finalize calibration
 >>> for name, module in model.named_modules():
->>>     if name.endswith('_input_quantizer'):
+...     if name.endswith("_input_quantizer"):
->>>         module.load_calib_amax()
+...         module.load_calib_amax()
->>>         module.enable_quant()
+...         module.enable_quant()
 >>> # If running on GPU, it needs to call .cuda() again because new tensors will be created by calibration process
 >>> model.cuda()
@@ -105,6 +105,7 @@ the instructions in [torch.onnx](https://pytorch.org/docs/stable/onnx.html). Exa
 ```python
 >>> from pytorch_quantization.nn import TensorQuantizer
 >>> TensorQuantizer.use_fb_fake_quant = True
 >>> # Load the calibrated model

--- a/docs/source/model_doc/reformer.mdx
+++ b/docs/source/model_doc/reformer.mdx
@@ -134,7 +134,7 @@ easily be trained on sequences as long as 64000 tokens.
 For training, the [`ReformerModelWithLMHead`] should be used as follows:
 ```python
-input_ids = tokenizer.encode('This is a sentence from the training data', return_tensors='pt')
+input_ids = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
 loss = model(input_ids, labels=input_ids)[0]
 ```

--- a/docs/source/model_doc/speech_to_text.mdx
+++ b/docs/source/model_doc/speech_to_text.mdx
@@ -52,11 +52,13 @@ be installed as follows: `apt install libsndfile1-dev`
 >>> model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
 >>> processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
 >>> def map_to_array(batch):
 ...     speech, _ = sf.read(batch["file"])
 ...     batch["speech"] = speech
 ...     return batch
 >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
 >>> ds = ds.map(map_to_array)
@@ -83,16 +85,22 @@ be installed as follows: `apt install libsndfile1-dev`
 >>> model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
 >>> processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
 >>> def map_to_array(batch):
 ...     speech, _ = sf.read(batch["file"])
 ...     batch["speech"] = speech
 ...     return batch
 >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
 >>> ds = ds.map(map_to_array)
 >>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
->>> generated_ids = model.generate(input_ids=inputs["input_features"], attention_mask=inputs["attention_mask], forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"])
+>>> generated_ids = model.generate(
+...     input_ids=inputs["input_features"],
+...     attention_mask=inputs["attention_mask"],
+...     forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
+... )
 >>> translation = processor.batch_decode(generated_ids)
 ```