"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "7edf8bfafd464de082051b319df2cea338083f36"
Unverified Commit c52b515e authored by Mike Salvatore's avatar Mike Salvatore Committed by GitHub
Browse files

Fix a typo in tokenizer documentation (#28118)

parent a52e180a
...@@ -185,7 +185,7 @@ class JukeboxTokenizer(PreTrainedTokenizer): ...@@ -185,7 +185,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
def _tokenize(self, lyrics): def _tokenize(self, lyrics):
""" """
Converts a string in a sequence of tokens (string), using the tokenizer. Split in words for word-based Converts a string into a sequence of tokens (string), using the tokenizer. Split in words for word-based
vocabulary or sub-words for sub-word-based vocabularies (BPE/SentencePieces/WordPieces). vocabulary or sub-words for sub-word-based vocabularies (BPE/SentencePieces/WordPieces).
Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary. Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary.
......
...@@ -281,7 +281,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): ...@@ -281,7 +281,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer):
def _tokenize(self, text, **kwargs): def _tokenize(self, text, **kwargs):
""" """
Converts a string in a sequence of tokens (string), using the tokenizer. Converts a string into a sequence of tokens (string), using the tokenizer.
""" """
if self.do_lower_case: if self.do_lower_case:
text = text.upper() text = text.upper()
......
...@@ -247,7 +247,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer): ...@@ -247,7 +247,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer):
def _tokenize(self, text, **kwargs): def _tokenize(self, text, **kwargs):
""" """
Converts a string in a sequence of tokens (string), using the tokenizer. Converts a string into a sequence of tokens (string), using the tokenizer.
""" """
# make sure whitespace is stripped to prevent <unk> # make sure whitespace is stripped to prevent <unk>
......
...@@ -540,7 +540,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): ...@@ -540,7 +540,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
def tokenize(self, text: TextInput, **kwargs) -> List[str]: def tokenize(self, text: TextInput, **kwargs) -> List[str]:
""" """
Converts a string in a sequence of tokens, using the tokenizer. Converts a string into a sequence of tokens, using the tokenizer.
Split in words for word-based vocabulary or sub-words for sub-word-based vocabularies Split in words for word-based vocabulary or sub-words for sub-word-based vocabularies
(BPE/SentencePieces/WordPieces). Takes care of added tokens. (BPE/SentencePieces/WordPieces). Takes care of added tokens.
...@@ -620,7 +620,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): ...@@ -620,7 +620,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
def _tokenize(self, text, **kwargs): def _tokenize(self, text, **kwargs):
""" """
Converts a string in a sequence of tokens (string), using the tokenizer. Split in words for word-based Converts a string into a sequence of tokens (string), using the tokenizer. Split in words for word-based
vocabulary or sub-words for sub-word-based vocabularies (BPE/SentencePieces/WordPieces). vocabulary or sub-words for sub-word-based vocabularies (BPE/SentencePieces/WordPieces).
Do NOT take care of added tokens. Do NOT take care of added tokens.
......
...@@ -2515,7 +2515,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ...@@ -2515,7 +2515,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
def tokenize(self, text: str, pair: Optional[str] = None, add_special_tokens: bool = False, **kwargs) -> List[str]: def tokenize(self, text: str, pair: Optional[str] = None, add_special_tokens: bool = False, **kwargs) -> List[str]:
""" """
Converts a string in a sequence of tokens, replacing unknown tokens with the `unk_token`. Converts a string into a sequence of tokens, replacing unknown tokens with the `unk_token`.
Args: Args:
text (`str`): text (`str`):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment