Fix outdated tokenizer doc

a0d38645 · Julien Chaumond · ea636440 · a0d38645 · a0d38645 · a0d38645
Commit a0d38645 authored Dec 17, 2019 by Julien Chaumond
3 changed files
--- a/templates/adding_a_new_model/tokenization_xxx.py
+++ b/templates/adding_a_new_model/tokenization_xxx.py
@@ -85,7 +85,7 @@ class XxxTokenizer(PreTrainedTokenizer):

    Args:
        vocab_file: Path to a one-wordpiece-per-line vocabulary file
-        do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
+        do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
    """

    vocab_files_names = VOCAB_FILES_NAMES

--- a/transformers/tokenization_bert.py
+++ b/transformers/tokenization_bert.py
@@ -113,12 +113,12 @@ class BertTokenizer(PreTrainedTokenizer):

    Args:
        vocab_file: Path to a one-wordpiece-per-line vocabulary file
-        do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
+        do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
        do_basic_tokenize: Whether to do basic tokenization before wordpiece.
        max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
            minimum of this value (if specified) and the underlying BERT model's sequence length.
        never_split: List of tokens which will never be split during tokenization. Only has an effect when
-            do_wordpiece_only=False
+            do_basic_tokenize=True
    """

    vocab_files_names = VOCAB_FILES_NAMES

--- a/transformers/tokenization_distilbert.py
+++ b/transformers/tokenization_distilbert.py
@@ -53,12 +53,12 @@ class DistilBertTokenizer(BertTokenizer):

    Args:
        vocab_file: Path to a one-wordpiece-per-line vocabulary file
-        do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
+        do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
        do_basic_tokenize: Whether to do basic tokenization before wordpiece.
        max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
            minimum of this value (if specified) and the underlying BERT model's sequence length.
        never_split: List of tokens which will never be split during tokenization. Only has an effect when
-            do_wordpiece_only=False
+            do_basic_tokenize=True
    """

    vocab_files_names = VOCAB_FILES_NAMES