Unverified Commit 6d9e11a1 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

S2S + M2M100 should be available in tokenization_auto (#10657)

* S2S + M2M100 should be available in tokenization_auto

* Requires sentencepiece

* SentencePiece for S2T as well :)
parent 602d63f0
...@@ -80,6 +80,7 @@ from .configuration_auto import ( ...@@ -80,6 +80,7 @@ from .configuration_auto import (
LEDConfig, LEDConfig,
LongformerConfig, LongformerConfig,
LxmertConfig, LxmertConfig,
M2M100Config,
MarianConfig, MarianConfig,
MBartConfig, MBartConfig,
MobileBertConfig, MobileBertConfig,
...@@ -92,6 +93,7 @@ from .configuration_auto import ( ...@@ -92,6 +93,7 @@ from .configuration_auto import (
ReformerConfig, ReformerConfig,
RetriBertConfig, RetriBertConfig,
RobertaConfig, RobertaConfig,
Speech2TextConfig,
SqueezeBertConfig, SqueezeBertConfig,
T5Config, T5Config,
TapasConfig, TapasConfig,
...@@ -111,11 +113,13 @@ if is_sentencepiece_available(): ...@@ -111,11 +113,13 @@ if is_sentencepiece_available():
from ..bert_generation.tokenization_bert_generation import BertGenerationTokenizer from ..bert_generation.tokenization_bert_generation import BertGenerationTokenizer
from ..camembert.tokenization_camembert import CamembertTokenizer from ..camembert.tokenization_camembert import CamembertTokenizer
from ..deberta_v2.tokenization_deberta_v2 import DebertaV2Tokenizer from ..deberta_v2.tokenization_deberta_v2 import DebertaV2Tokenizer
from ..m2m_100 import M2M100Tokenizer
from ..marian.tokenization_marian import MarianTokenizer from ..marian.tokenization_marian import MarianTokenizer
from ..mbart.tokenization_mbart import MBartTokenizer from ..mbart.tokenization_mbart import MBartTokenizer
from ..mt5 import MT5Tokenizer from ..mt5 import MT5Tokenizer
from ..pegasus.tokenization_pegasus import PegasusTokenizer from ..pegasus.tokenization_pegasus import PegasusTokenizer
from ..reformer.tokenization_reformer import ReformerTokenizer from ..reformer.tokenization_reformer import ReformerTokenizer
from ..speech_to_text import Speech2TextTokenizer
from ..t5.tokenization_t5 import T5Tokenizer from ..t5.tokenization_t5 import T5Tokenizer
from ..xlm_prophetnet.tokenization_xlm_prophetnet import XLMProphetNetTokenizer from ..xlm_prophetnet.tokenization_xlm_prophetnet import XLMProphetNetTokenizer
from ..xlm_roberta.tokenization_xlm_roberta import XLMRobertaTokenizer from ..xlm_roberta.tokenization_xlm_roberta import XLMRobertaTokenizer
...@@ -135,6 +139,8 @@ else: ...@@ -135,6 +139,8 @@ else:
XLMRobertaTokenizer = None XLMRobertaTokenizer = None
XLNetTokenizer = None XLNetTokenizer = None
XLMProphetNetTokenizer = None XLMProphetNetTokenizer = None
M2M100Tokenizer = None
Speech2TextTokenizer = None
if is_tokenizers_available(): if is_tokenizers_available():
from ..albert.tokenization_albert_fast import AlbertTokenizerFast from ..albert.tokenization_albert_fast import AlbertTokenizerFast
...@@ -197,6 +203,7 @@ else: ...@@ -197,6 +203,7 @@ else:
XLMRobertaTokenizerFast = None XLMRobertaTokenizerFast = None
XLNetTokenizerFast = None XLNetTokenizerFast = None
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
...@@ -240,6 +247,8 @@ TOKENIZER_MAPPING = OrderedDict( ...@@ -240,6 +247,8 @@ TOKENIZER_MAPPING = OrderedDict(
(DebertaV2Config, (DebertaV2Tokenizer, None)), (DebertaV2Config, (DebertaV2Tokenizer, None)),
(RagConfig, (RagTokenizer, None)), (RagConfig, (RagTokenizer, None)),
(XLMProphetNetConfig, (XLMProphetNetTokenizer, None)), (XLMProphetNetConfig, (XLMProphetNetTokenizer, None)),
(Speech2TextConfig, (Speech2TextTokenizer, None)),
(M2M100Config, (M2M100Tokenizer, None)),
(ProphetNetConfig, (ProphetNetTokenizer, None)), (ProphetNetConfig, (ProphetNetTokenizer, None)),
(MPNetConfig, (MPNetTokenizer, MPNetTokenizerFast)), (MPNetConfig, (MPNetTokenizer, MPNetTokenizerFast)),
(TapasConfig, (TapasTokenizer, None)), (TapasConfig, (TapasTokenizer, None)),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment