You need to sign in or sign up before continuing.
Unverified Commit 2d920010 authored by Yong woo Song's avatar Yong woo Song Committed by GitHub
Browse files

Typo on doctring in ElectraTokenizer (#20192)

* chore: typo on docstring in tokenization_electra

* chore: typo on docstring in tokenization_electra

* update for check copies
parent 4c7e8d09
......@@ -89,10 +89,10 @@ def whitespace_tokenize(text):
return tokens
# Copied from transformers.models.bert.tokenization_bert.BertTokenizer with Bert->Electra
# Copied from transformers.models.bert.tokenization_bert.BertTokenizer with Bert->Electra,BERT->Electra
class ElectraTokenizer(PreTrainedTokenizer):
r"""
Construct a BERT tokenizer. Based on WordPiece.
Construct a Electra tokenizer. Based on WordPiece.
This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
this superclass for more information regarding those methods.
......@@ -129,7 +129,7 @@ class ElectraTokenizer(PreTrainedTokenizer):
[issue](https://github.com/huggingface/transformers/issues/328)).
strip_accents (`bool`, *optional*):
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
value for `lowercase` (as in the original BERT).
value for `lowercase` (as in the original Electra).
"""
vocab_files_names = VOCAB_FILES_NAMES
......@@ -226,7 +226,7 @@ class ElectraTokenizer(PreTrainedTokenizer):
) -> List[int]:
"""
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A BERT sequence has the following format:
adding special tokens. A Electra sequence has the following format:
- single sequence: `[CLS] X [SEP]`
- pair of sequences: `[CLS] A [SEP] B [SEP]`
......@@ -278,8 +278,8 @@ class ElectraTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A BERT sequence
pair mask has the following format:
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Electra
sequence pair mask has the following format:
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment