Unverified Commit 9208f57b authored by Suraj Patil's avatar Suraj Patil Committed by GitHub
Browse files

BartTokenizerFast (#4878)

parent 403d3098
...@@ -118,7 +118,7 @@ from .pipelines import ( ...@@ -118,7 +118,7 @@ from .pipelines import (
# Tokenizers # Tokenizers
from .tokenization_albert import AlbertTokenizer from .tokenization_albert import AlbertTokenizer
from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
from .tokenization_bart import BartTokenizer, MBartTokenizer from .tokenization_bart import BartTokenizer, BartTokenizerFast, MBartTokenizer
from .tokenization_bert import BasicTokenizer, BertTokenizer, BertTokenizerFast, WordpieceTokenizer from .tokenization_bert import BasicTokenizer, BertTokenizer, BertTokenizerFast, WordpieceTokenizer
from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
from .tokenization_camembert import CamembertTokenizer from .tokenization_camembert import CamembertTokenizer
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
import logging import logging
from typing import List, Optional from typing import List, Optional
from .tokenization_roberta import RobertaTokenizer from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
from .tokenization_utils import BatchEncoding from .tokenization_utils import BatchEncoding
from .tokenization_xlm_roberta import XLMRobertaTokenizer from .tokenization_xlm_roberta import XLMRobertaTokenizer
...@@ -44,6 +44,15 @@ class BartTokenizer(RobertaTokenizer): ...@@ -44,6 +44,15 @@ class BartTokenizer(RobertaTokenizer):
} }
class BartTokenizerFast(RobertaTokenizerFast):
# merges and vocab same as Roberta
max_model_input_sizes = {m: 1024 for m in _all_bart_models}
pretrained_vocab_files_map = {
"vocab_file": {m: vocab_url for m in _all_bart_models},
"merges_file": {m: merges_url for m in _all_bart_models},
}
_all_mbart_models = ["facebook/mbart-large-en-ro"] _all_mbart_models = ["facebook/mbart-large-en-ro"]
SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model" SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment