Unverified Commit e09e54fd authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

MT5 should have an autotokenizer (#8743)

* MT5 should have an autotokenizer

* Different configurations should be able to point to same tokenizers
parent 6fdd0bb2
......@@ -72,6 +72,7 @@ from .configuration_auto import (
MarianConfig,
MBartConfig,
MobileBertConfig,
MT5Config,
OpenAIGPTConfig,
PegasusConfig,
ProphetNetConfig,
......@@ -173,6 +174,7 @@ TOKENIZER_MAPPING = OrderedDict(
[
(RetriBertConfig, (RetriBertTokenizer, RetriBertTokenizerFast)),
(T5Config, (T5Tokenizer, T5TokenizerFast)),
(MT5Config, (T5Tokenizer, T5TokenizerFast)),
(MobileBertConfig, (MobileBertTokenizer, MobileBertTokenizerFast)),
(DistilBertConfig, (DistilBertTokenizer, DistilBertTokenizerFast)),
(AlbertConfig, (AlbertTokenizer, AlbertTokenizerFast)),
......
......@@ -99,21 +99,13 @@ class AutoTokenizerTest(unittest.TestCase):
for mapping in mappings:
mapping = tuple(mapping.items())
for index, (child_config, (child_model_py, child_model_fast)) in enumerate(mapping[1:]):
for parent_config, (parent_model_py, parent_model_fast) in mapping[: index + 1]:
for index, (child_config, _) in enumerate(mapping[1:]):
for parent_config, _ in mapping[: index + 1]:
with self.subTest(
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
self.assertFalse(issubclass(child_config, parent_config))
# Check for Slow tokenizer implementation if provided
if child_model_py and parent_model_py:
self.assertFalse(issubclass(child_model_py, parent_model_py))
# Check for Fast tokenizer implementation if provided
if child_model_fast and parent_model_fast:
self.assertFalse(issubclass(child_model_fast, parent_model_fast))
@require_tokenizers
def test_from_pretrained_use_fast_toggle(self):
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment