Unverified Commit f778edb7 authored by Li-Huai (Allan) Lin's avatar Li-Huai (Allan) Lin Committed by GitHub
Browse files

Fix typo in BERT tokenization file (#15228)

* Fix typo

* Fix copies
parent 2a5a3849
......@@ -151,7 +151,7 @@ class BertTokenizer(PreTrainedTokenizer):
This should likely be deactivated for Japanese (see this
[issue](https://github.com/huggingface/transformers/issues/328)).
strip_accents: (`bool`, *optional*):
strip_accents (`bool`, *optional*):
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
value for `lowercase` (as in the original BERT).
"""
......@@ -382,7 +382,7 @@ class BasicTokenizer(object):
WordPieceTokenizer.
Args:
never_split (`LIst[str]`, *optional*)
never_split (`List[str]`, *optional*)
Kept for backward compatibility purposes. Now implemented directly at the base class level (see
[`PreTrainedTokenizer.tokenize`]) List of token not to split.
"""
......
......@@ -147,10 +147,10 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
Whether or not to tokenize Chinese characters. This should likely be deactivated for Japanese (see [this
issue](https://github.com/huggingface/transformers/issues/328)).
strip_accents: (`bool`, *optional*):
strip_accents (`bool`, *optional*):
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
value for `lowercase` (as in the original BERT).
wordpieces_prefix: (`str`, *optional*, defaults to `"##"`):
wordpieces_prefix (`str`, *optional*, defaults to `"##"`):
The prefix for subwords.
"""
......
......@@ -1317,7 +1317,7 @@ class BasicTokenizer(object):
WordPieceTokenizer.
Args:
never_split (`LIst[str]`, *optional*)
never_split (`List[str]`, *optional*)
Kept for backward compatibility purposes. Now implemented directly at the base class level (see
[`PreTrainedTokenizer.tokenize`]) List of token not to split.
"""
......
......@@ -359,7 +359,7 @@ class BasicTokenizer(object):
WordPieceTokenizer.
Args:
never_split (`LIst[str]`, *optional*)
never_split (`List[str]`, *optional*)
Kept for backward compatibility purposes. Now implemented directly at the base class level (see
[`PreTrainedTokenizer.tokenize`]) List of token not to split.
"""
......
......@@ -2024,7 +2024,7 @@ class BasicTokenizer(object):
WordPieceTokenizer.
Args:
never_split (`LIst[str]`, *optional*)
never_split (`List[str]`, *optional*)
Kept for backward compatibility purposes. Now implemented directly at the base class level (see
[`PreTrainedTokenizer.tokenize`]) List of token not to split.
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment