Fix ProphetNetTokenizer (#16082)

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

Fix ProphetNetTokenizer (#16082)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
5dbf36bd · Yih-Dar · GitHub · 923c35b5 · 5dbf36bd
Unverified Commit 5dbf36bd authored Mar 14, 2022 by Yih-Dar Committed by GitHub Mar 14, 2022
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 0 deletions

src/transformers/models/prophetnet/tokenization_prophetnet.py ...transformers/models/prophetnet/tokenization_prophetnet.py +5 -0

No files found.
--- a/src/transformers/models/prophetnet/tokenization_prophetnet.py
+++ b/src/transformers/models/prophetnet/tokenization_prophetnet.py
@@ -102,6 +102,11 @@ class ProphetNetTokenizer(PreTrainedTokenizer):
    pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
+    # first name has to correspond to main model input name
+    # to make sure `tokenizer.pad(...)` works correctly
+    # `ProphetNet` doesn't have `token_type_ids` as argument.
+    model_input_names: List[str] = ["input_ids", "attention_mask"]
    def __init__(
        self,
        vocab_file,