Typo on doctring in ElectraTokenizer (#20192)

* chore: typo on docstring in tokenization_electra * chore: typo on docstring in tokenization_electra * update for check copies

Typo on doctring in ElectraTokenizer (#20192)
* chore: typo on docstring in tokenization_electra * chore: typo on docstring in tokenization_electra * update for check copies
2d920010 · Yong woo Song · GitHub · 4c7e8d09 · 2d920010
Unverified Commit 2d920010 authored Nov 15, 2022 by Yong woo Song Committed by GitHub Nov 15, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

src/transformers/models/electra/tokenization_electra.py src/transformers/models/electra/tokenization_electra.py +6 -6

No files found.
--- a/src/transformers/models/electra/tokenization_electra.py
+++ b/src/transformers/models/electra/tokenization_electra.py
@@ -89,10 +89,10 @@ def whitespace_tokenize(text):
    return tokens
-# Copied from transformers.models.bert.tokenization_bert.BertTokenizer with Bert->Electra
+# Copied from transformers.models.bert.tokenization_bert.BertTokenizer with Bert->Electra,BERT->Electra
 class ElectraTokenizer(PreTrainedTokenizer):
    r"""
-    Construct a BERT tokenizer. Based on WordPiece.
+    Construct a Electra tokenizer. Based on WordPiece.
    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
    this superclass for more information regarding those methods.
@@ -129,7 +129,7 @@ class ElectraTokenizer(PreTrainedTokenizer):
            [issue](https://github.com/huggingface/transformers/issues/328)).
        strip_accents (`bool`, *optional*):
            Whether or not to strip all accents. If this option is not specified, then it will be determined by the
-            value for `lowercase` (as in the original BERT).
+            value for `lowercase` (as in the original Electra).
    """
    vocab_files_names = VOCAB_FILES_NAMES
@@ -226,7 +226,7 @@ class ElectraTokenizer(PreTrainedTokenizer):
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
-        adding special tokens. A BERT sequence has the following format:
+        adding special tokens. A Electra sequence has the following format:
        - single sequence: `[CLS] X [SEP]`
        - pair of sequences: `[CLS] A [SEP] B [SEP]`
@@ -278,8 +278,8 @@ class ElectraTokenizer(PreTrainedTokenizer):
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
-        Create a mask from the two sequences passed to be used in a sequence-pair classification task. A BERT sequence
+        Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Electra
-        pair mask has the following format:
+        sequence pair mask has the following format:
        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1