[MbartTokenizer] save to sentencepiece.bpe.model (#4335)

8581a670 · Mehrad Moradshahi · GitHub · 18d233d5 · 8581a670
Unverified Commit 8581a670 authored May 18, 2020 by Mehrad Moradshahi Committed by GitHub May 18, 2020
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 3 deletions

src/transformers/tokenization_bart.py src/transformers/tokenization_bart.py +1 -3

No files found.
--- a/src/transformers/tokenization_bart.py
+++ b/src/transformers/tokenization_bart.py
@@ -27,8 +27,6 @@ vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-v
 merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt"
 _all_bart_models = ["bart-large", "bart-large-mnli", "bart-large-cnn", "bart-large-xsum"]

-VOCAB_FILES_NAMES = {"vocab_file": "sentence.bpe.model"}
-

 class BartTokenizer(RobertaTokenizer):
    # merges and vocab same as Roberta
@@ -44,6 +42,6 @@ SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-la


 class MBartTokenizer(XLMRobertaTokenizer):
-    vocab_files_names = VOCAB_FILES_NAMES
+    vocab_files_names = {"vocab_file": "sentencepiece.bpe.model"}
    max_model_input_sizes = {m: 1024 for m in _all_mbart_models}
    pretrained_vocab_files_map = {"vocab_file": {m: SPM_URL for m in _all_mbart_models}}