Fix generation docstring (#14216)

* Fix generation docstring * Style

Fix generation docstring (#14216)
* Fix generation docstring * Style
a767276f · Li-Huai (Allan) Lin · GitHub · e20faa6f · a767276f · a767276f
Unverified Commit a767276f authored Nov 02, 2021 by Li-Huai (Allan) Lin Committed by GitHub Nov 02, 2021
Showing with 4 additions and 4 deletions

src/transformers/generation_utils.py src/transformers/generation_utils.py +2 -2

src/transformers/models/gpt2/tokenization_gpt2_fast.py src/transformers/models/gpt2/tokenization_gpt2_fast.py +2 -2

No files found.
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -849,11 +849,11 @@ class GenerationMixin:
            >>> outputs = model.generate(input_ids=input_ids, max_length=20, repetition_penalty=1.2)
            >>> print("Generated:", tokenizer.decode(outputs[0], skip_special_tokens=True))

-            >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
+            >>> tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=False)
            >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
            >>> input_context = "My cute dog"
            >>> # get tokens of words that should not be generated
-            >>> bad_words_ids = [tokenizer(bad_word, add_prefix_space=True).input_ids for bad_word in ["idiot", "stupid", "shut up"]]
+            >>> bad_words_ids = tokenizer(["idiot", "stupid", "shut up"], add_prefix_space=True).input_ids
            >>> # encode input context
            >>> input_ids = tokenizer(input_context, return_tensors="pt").input_ids
            >>> # generate sequences without allowing bad_words to be generated

--- a/src/transformers/models/gpt2/tokenization_gpt2_fast.py
+++ b/src/transformers/models/gpt2/tokenization_gpt2_fast.py
@@ -84,8 +84,8 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
        >>> tokenizer(" Hello world")['input_ids']
        [18435, 995]

-    You can get around that behavior by passing ``add_prefix_space=True`` when instantiating this tokenizer or when you
-    call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance.
+    You can get around that behavior by passing ``add_prefix_space=True`` when instantiating this tokenizer, but since
+    the model was not pretrained this way, it might yield a decrease in performance.

    .. note::