Fix pipeline tests for Roberta-like tokenizers (#19365)

* Fix pipeline tests for Roberta-like tokenizers * Fix fix

Fix pipeline tests for Roberta-like tokenizers (#19365)
* Fix pipeline tests for Roberta-like tokenizers * Fix fix
7e7f62bf · Sylvain Gugger · GitHub · bad353ce · 7e7f62bf
Unverified Commit 7e7f62bf authored Oct 05, 2022 by Sylvain Gugger Committed by GitHub Oct 05, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 3 deletions

tests/pipelines/test_pipelines_common.py tests/pipelines/test_pipelines_common.py +11 -3

No files found.
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -37,8 +37,6 @@ from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DistilBertForSequenceClassification,
-    IBertConfig,
-    RobertaConfig,
    TextClassificationPipeline,
    TFAutoModelForSequenceClassification,
    pipeline,
@@ -71,6 +69,16 @@ from test_module.custom_pipeline import PairClassificationPipeline  # noqa E402
 logger = logging.getLogger(__name__)


+ROBERTA_EMBEDDING_ADJUSMENT_CONFIGS = [
+    "CamembertConfig",
+    "IBertConfig",
+    "LongformerConfig",
+    "MarkupLMConfig",
+    "RobertaConfig",
+    "XLMRobertaConfig",
+]
+
+
 def get_checkpoint_from_architecture(architecture):
    try:
        module = importlib.import_module(architecture.__module__)
@@ -194,7 +202,7 @@ class PipelineTestCaseMeta(type):
                    try:
                        tokenizer = get_tiny_tokenizer_from_checkpoint(checkpoint)
                        # XLNet actually defines it as -1.
-                        if isinstance(model.config, (RobertaConfig, IBertConfig)):
+                        if model.config.__class__.__name__ in ROBERTA_EMBEDDING_ADJUSMENT_CONFIGS:
                            tokenizer.model_max_length = model.config.max_position_embeddings - 2
                        elif (
                            hasattr(model.config, "max_position_embeddings")