Commit a0d38645 authored by Julien Chaumond's avatar Julien Chaumond
Browse files

Fix outdated tokenizer doc

parent ea636440
...@@ -85,7 +85,7 @@ class XxxTokenizer(PreTrainedTokenizer): ...@@ -85,7 +85,7 @@ class XxxTokenizer(PreTrainedTokenizer):
Args: Args:
vocab_file: Path to a one-wordpiece-per-line vocabulary file vocab_file: Path to a one-wordpiece-per-line vocabulary file
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
......
...@@ -113,12 +113,12 @@ class BertTokenizer(PreTrainedTokenizer): ...@@ -113,12 +113,12 @@ class BertTokenizer(PreTrainedTokenizer):
Args: Args:
vocab_file: Path to a one-wordpiece-per-line vocabulary file vocab_file: Path to a one-wordpiece-per-line vocabulary file
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
do_basic_tokenize: Whether to do basic tokenization before wordpiece. do_basic_tokenize: Whether to do basic tokenization before wordpiece.
max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
minimum of this value (if specified) and the underlying BERT model's sequence length. minimum of this value (if specified) and the underlying BERT model's sequence length.
never_split: List of tokens which will never be split during tokenization. Only has an effect when never_split: List of tokens which will never be split during tokenization. Only has an effect when
do_wordpiece_only=False do_basic_tokenize=True
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
......
...@@ -53,12 +53,12 @@ class DistilBertTokenizer(BertTokenizer): ...@@ -53,12 +53,12 @@ class DistilBertTokenizer(BertTokenizer):
Args: Args:
vocab_file: Path to a one-wordpiece-per-line vocabulary file vocab_file: Path to a one-wordpiece-per-line vocabulary file
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
do_basic_tokenize: Whether to do basic tokenization before wordpiece. do_basic_tokenize: Whether to do basic tokenization before wordpiece.
max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
minimum of this value (if specified) and the underlying BERT model's sequence length. minimum of this value (if specified) and the underlying BERT model's sequence length.
never_split: List of tokens which will never be split during tokenization. Only has an effect when never_split: List of tokens which will never be split during tokenization. Only has an effect when
do_wordpiece_only=False do_basic_tokenize=True
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment