Unverified Commit 8581a670 authored by Mehrad Moradshahi's avatar Mehrad Moradshahi Committed by GitHub
Browse files

[MbartTokenizer] save to sentencepiece.bpe.model (#4335)

parent 18d233d5
......@@ -27,8 +27,6 @@ vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-v
merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt"
_all_bart_models = ["bart-large", "bart-large-mnli", "bart-large-cnn", "bart-large-xsum"]
VOCAB_FILES_NAMES = {"vocab_file": "sentence.bpe.model"}
class BartTokenizer(RobertaTokenizer):
# merges and vocab same as Roberta
......@@ -44,6 +42,6 @@ SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-la
class MBartTokenizer(XLMRobertaTokenizer):
vocab_files_names = VOCAB_FILES_NAMES
vocab_files_names = {"vocab_file": "sentencepiece.bpe.model"}
max_model_input_sizes = {m: 1024 for m in _all_mbart_models}
pretrained_vocab_files_map = {"vocab_file": {m: SPM_URL for m in _all_mbart_models}}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment