Fix GPT2 docstring

07bf4307 · Bilal Khan · Lysandre Debut · fa963ecc · 07bf4307
Commit 07bf4307 authored Nov 21, 2019 by Bilal Khan Committed by Lysandre Debut Nov 25, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

transformers/tokenization_gpt2.py transformers/tokenization_gpt2.py +3 -3

No files found.
--- a/transformers/tokenization_gpt2.py
+++ b/transformers/tokenization_gpt2.py
@@ -107,10 +107,10 @@ class GPT2Tokenizer(PreTrainedTokenizer):
    """
    GPT-2 BPE tokenizer. Peculiarities:
        - Byte-level Byte-Pair-Encoding
-        - Requires a space to start the input string => the encoding methods should be called with the
+        - Requires a space to start the input string => the encoding and tokenize methods should be called with the
          ``add_prefix_space`` flag set to ``True``.
-          Otherwise, this tokenizer ``encode`` and ``decode`` method will not conserve
+          Otherwise, this tokenizer's ``encode``, ``decode``, and ``tokenize`` methods will not conserve
-          the absence of a space at the beginning of a string: `tokenizer.decode(tokenizer.encode("Hello")) = " Hello"`
+          the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"`
    """
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP