🔥Rework pipeline testing by removing `PipelineTestCaseMeta` 🚀 (#21516)

* Add PipelineTesterMixin * remove class PipelineTestCaseMeta * move validate_test_components * Add for ViT * Add to SPECIAL_MODULE_TO_TEST_MAP * style and quality * Add feature-extraction * update * raise instead of skip * add tiny_model_summary.json * more explicit * skip tasks not in mapping * add availability check * Add Copyright * A way to diable irrelevant tests * update with main * remove disable_irrelevant_tests * skip tests * better skip message * better skip message * Add all pipeline task tests * revert * Import PipelineTesterMixin * subclass test classes with PipelineTesterMixin * Add pipieline_model_mapping * Fix import after adding pipieline_model_mapping * Fix style and quality after adding pipieline_model_mapping * Fix one more import after adding pipieline_model_mapping * Fix style and quality after adding pipieline_model_mapping * Fix test issues * Fix import requirements * Fix mapping for MobileViTModelTest * Update * Better skip message * pipieline_model_mapping could not be None * Remove some PipelineTesterMixin * Fix typo * revert tests_fetcher.py * update * rename * revert * Remove PipelineTestCaseMeta from ZeroShotAudioClassificationPipelineTests * style and quality * test fetcher for all pipeline/model tests --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

🔥Rework pipeline testing by removing `PipelineTestCaseMeta` 🚀 (#21516)
* Add PipelineTesterMixin * remove class PipelineTestCaseMeta * move validate_test_components * Add for ViT * Add to SPECIAL_MODULE_TO_TEST_MAP * style and quality * Add feature-extraction * update * raise instead of skip * add tiny_model_summary.json * more explicit * skip tasks not in mapping * add availability check * Add Copyright * A way to diable irrelevant tests * update with main * remove disable_irrelevant_tests * skip tests * better skip message * better skip message * Add all pipeline task tests * revert * Import PipelineTesterMixin * subclass test classes with PipelineTesterMixin * Add pipieline_model_mapping * Fix import after adding pipieline_model_mapping * Fix style and quality after adding pipieline_model_mapping * Fix one more import after adding pipieline_model_mapping * Fix style and quality after adding pipieline_model_mapping * Fix test issues * Fix import requirements * Fix mapping for MobileViTModelTest * Update * Better skip message * pipieline_model_mapping could not be None * Remove some PipelineTesterMixin * Fix typo * revert tests_fetcher.py * update * rename * revert * Remove PipelineTestCaseMeta from ZeroShotAudioClassificationPipelineTests * style and quality * test fetcher for all pipeline/model tests --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
871c31a6 · Yih-Dar · GitHub · 4cb5ffa9 · 871c31a6 · 871c31a6
Unverified Commit 871c31a6 authored Feb 28, 2023 by Yih-Dar Committed by GitHub Feb 28, 2023
20 changed files
--- a/tests/models/wavlm/test_modeling_wavlm.py
+++ b/tests/models/wavlm/test_modeling_wavlm.py
@@ -31,6 +31,7 @@ from ...test_modeling_common import (
    ids_tensor,
    random_attention_mask,
 )
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -308,12 +309,21 @@ class WavLMModelTester:


 @require_torch
-class WavLMModelTest(ModelTesterMixin, unittest.TestCase):
+class WavLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (WavLMForCTC, WavLMModel, WavLMForAudioFrameClassification, WavLMForSequenceClassification, WavLMForXVector)
        if is_torch_available()
        else ()
    )
+    pipeline_model_mapping = (
+        {
+            "audio-classification": WavLMForSequenceClassification,
+            "automatic-speech-recognition": WavLMForCTC,
+            "feature-extraction": WavLMModel,
+        }
+        if is_torch_available()
+        else {}
+    )
    test_pruning = False
    test_headmasking = False


--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -28,6 +28,7 @@ from transformers.utils.import_utils import is_datasets_available

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_datasets_available():
@@ -253,9 +254,10 @@ class TFWhisperModelTester:


 @require_tf
-class TFWhisperModelTest(TFModelTesterMixin, unittest.TestCase):
+class TFWhisperModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (TFWhisperModel, TFWhisperForConditionalGeneration) if is_tf_available() else ()
    all_generative_model_classes = (TFWhisperForConditionalGeneration,) if is_tf_available() else ()
+    pipeline_model_mapping = {"feature-extraction": TFWhisperModel} if is_tf_available() else {}
    is_encoder_decoder = True
    fx_compatible = False
    test_pruning = False

--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -31,6 +31,7 @@ from transformers.utils.import_utils import is_datasets_available
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_datasets_available():
@@ -271,9 +272,14 @@ class WhisperModelTester:


 @require_torch
-class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
+class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (WhisperModel, WhisperForConditionalGeneration) if is_torch_available() else ()
    all_generative_model_classes = (WhisperForConditionalGeneration,) if is_torch_available() else ()
+    pipeline_model_mapping = (
+        {"automatic-speech-recognition": WhisperForConditionalGeneration, "feature-extraction": WhisperModel}
+        if is_torch_available()
+        else {}
+    )
    is_encoder_decoder = True
    fx_compatible = False
    test_pruning = False

--- a/tests/models/x_clip/test_modeling_x_clip.py
+++ b/tests/models/x_clip/test_modeling_x_clip.py
@@ -35,6 +35,7 @@ from ...test_modeling_common import (
    ids_tensor,
    random_attention_mask,
 )
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -506,8 +507,9 @@ class XCLIPModelTester:


 @require_torch
-class XCLIPModelTest(ModelTesterMixin, unittest.TestCase):
+class XCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (XCLIPModel,) if is_torch_available() else ()
+    pipeline_model_mapping = {"feature-extraction": XCLIPModel} if is_torch_available() else {}
    fx_compatible = False
    test_head_masking = False
    test_pruning = False

--- a/tests/models/xglm/test_modeling_tf_xglm.py
+++ b/tests/models/xglm/test_modeling_tf_xglm.py
@@ -20,6 +20,7 @@ from transformers.testing_utils import require_tf, slow

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_tf_available():
@@ -139,9 +140,12 @@ class TFXGLMModelTester:


 @require_tf
-class TFXGLMModelTest(TFModelTesterMixin, unittest.TestCase):
+class TFXGLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (TFXGLMModel, TFXGLMForCausalLM) if is_tf_available() else ()
    all_generative_model_classes = (TFXGLMForCausalLM,) if is_tf_available() else ()
+    pipeline_model_mapping = (
+        {"feature-extraction": TFXGLMModel, "text-generation": TFXGLMForCausalLM} if is_tf_available() else {}
+    )
    test_onnx = False
    test_missing_keys = False
    test_pruning = False

--- a/tests/models/xglm/test_modeling_xglm.py
+++ b/tests/models/xglm/test_modeling_xglm.py
@@ -23,6 +23,7 @@ from transformers.testing_utils import require_torch, require_torch_gpu, slow, t
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -294,9 +295,12 @@ class XGLMModelTester:


 @require_torch
-class XGLMModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
+class XGLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (XGLMModel, XGLMForCausalLM) if is_torch_available() else ()
    all_generative_model_classes = (XGLMForCausalLM,) if is_torch_available() else ()
+    pipeline_model_mapping = (
+        {"feature-extraction": XGLMModel, "text-generation": XGLMForCausalLM} if is_torch_available() else {}
+    )
    fx_compatible = True
    test_missing_keys = False
    test_pruning = False

--- a/tests/models/xlm/test_modeling_tf_xlm.py
+++ b/tests/models/xlm/test_modeling_tf_xlm.py
@@ -21,6 +21,7 @@ from transformers.testing_utils import require_tf, slow

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_tf_available():
@@ -276,7 +277,7 @@ class TFXLMModelTester:


 @require_tf
-class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
+class TFXLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
            TFXLMModel,
@@ -292,6 +293,19 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
    all_generative_model_classes = (
        (TFXLMWithLMHeadModel,) if is_tf_available() else ()
    )  # TODO (PVP): Check other models whether language generation is also applicable
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": TFXLMModel,
+            "fill-mask": TFXLMWithLMHeadModel,
+            "question-answering": TFXLMForQuestionAnsweringSimple,
+            "text-classification": TFXLMForSequenceClassification,
+            "text-generation": TFXLMWithLMHeadModel,
+            "token-classification": TFXLMForTokenClassification,
+            "zero-shot": TFXLMForSequenceClassification,
+        }
+        if is_tf_available()
+        else {}
+    )
    test_head_masking = False
    test_onnx = False


--- a/tests/models/xlm/test_modeling_xlm.py
+++ b/tests/models/xlm/test_modeling_xlm.py
@@ -21,6 +21,7 @@ from transformers.testing_utils import require_torch, slow, torch_device
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -359,7 +360,7 @@ class XLMModelTester:


 @require_torch
-class XLMModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
+class XLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
            XLMModel,
@@ -376,6 +377,19 @@ class XLMModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
    all_generative_model_classes = (
        (XLMWithLMHeadModel,) if is_torch_available() else ()
    )  # TODO (PVP): Check other models whether language generation is also applicable
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": XLMModel,
+            "fill-mask": XLMWithLMHeadModel,
+            "question-answering": XLMForQuestionAnsweringSimple,
+            "text-classification": XLMForSequenceClassification,
+            "text-generation": XLMWithLMHeadModel,
+            "token-classification": XLMForTokenClassification,
+            "zero-shot": XLMForSequenceClassification,
+        }
+        if is_torch_available()
+        else {}
+    )

    # XLM has 2 QA models -> need to manually set the correct labels for one of them here
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):

--- a/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py
+++ b/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py
@@ -22,6 +22,7 @@ from transformers.testing_utils import require_torch, slow, torch_device
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -357,7 +358,7 @@ class XLMRobertaXLModelTester:


 @require_torch
-class XLMRobertaXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
+class XLMRobertaXLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
            XLMRobertaXLForCausalLM,
@@ -372,6 +373,19 @@ class XLMRobertaXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Te
        else ()
    )
    all_generative_model_classes = (XLMRobertaXLForCausalLM,) if is_torch_available() else ()
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": XLMRobertaXLModel,
+            "fill-mask": XLMRobertaXLForMaskedLM,
+            "question-answering": XLMRobertaXLForQuestionAnswering,
+            "text-classification": XLMRobertaXLForSequenceClassification,
+            "text-generation": XLMRobertaXLForCausalLM,
+            "token-classification": XLMRobertaXLForTokenClassification,
+            "zero-shot": XLMRobertaXLForSequenceClassification,
+        }
+        if is_torch_available()
+        else {}
+    )

    def setUp(self):
        self.model_tester = XLMRobertaXLModelTester(self)

--- a/tests/models/xlnet/test_modeling_tf_xlnet.py
+++ b/tests/models/xlnet/test_modeling_tf_xlnet.py
@@ -23,6 +23,7 @@ from transformers.testing_utils import require_tf, slow

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_tf_available():
@@ -331,7 +332,7 @@ class TFXLNetModelTester:


 @require_tf
-class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
+class TFXLNetModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
            TFXLNetModel,
@@ -347,6 +348,18 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
    all_generative_model_classes = (
        (TFXLNetLMHeadModel,) if is_tf_available() else ()
    )  # TODO (PVP): Check other models whether language generation is also applicable
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": TFXLNetModel,
+            "question-answering": TFXLNetForQuestionAnsweringSimple,
+            "text-classification": TFXLNetForSequenceClassification,
+            "text-generation": TFXLNetLMHeadModel,
+            "token-classification": TFXLNetForTokenClassification,
+            "zero-shot": TFXLNetForSequenceClassification,
+        }
+        if is_tf_available()
+        else {}
+    )
    test_head_masking = False
    test_onnx = False


--- a/tests/models/xlnet/test_modeling_xlnet.py
+++ b/tests/models/xlnet/test_modeling_xlnet.py
@@ -22,6 +22,7 @@ from transformers.testing_utils import require_torch, slow, torch_device
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -509,7 +510,7 @@ class XLNetModelTester:


 @require_torch
-class XLNetModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
+class XLNetModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
            XLNetModel,
@@ -526,6 +527,18 @@ class XLNetModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase)
    all_generative_model_classes = (
        (XLNetLMHeadModel,) if is_torch_available() else ()
    )  # TODO (PVP): Check other models whether language generation is also applicable
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": XLNetModel,
+            "question-answering": XLNetForQuestionAnsweringSimple,
+            "text-classification": XLNetForSequenceClassification,
+            "text-generation": XLNetLMHeadModel,
+            "token-classification": XLNetForTokenClassification,
+            "zero-shot": XLNetForSequenceClassification,
+        }
+        if is_torch_available()
+        else {}
+    )
    fx_compatible = False
    test_pruning = False


--- a/tests/models/xmod/test_modeling_xmod.py
+++ b/tests/models/xmod/test_modeling_xmod.py
@@ -20,6 +20,7 @@ from transformers.testing_utils import require_sentencepiece, require_tokenizers
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -354,7 +355,7 @@ class XmodModelTester:


 @require_torch
-class XmodModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
+class XmodModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
            XmodForCausalLM,
@@ -369,6 +370,19 @@ class XmodModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
        else ()
    )
    all_generative_model_classes = (XmodForCausalLM,) if is_torch_available() else ()
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": XmodModel,
+            "fill-mask": XmodForMaskedLM,
+            "question-answering": XmodForQuestionAnswering,
+            "text-classification": XmodForSequenceClassification,
+            "text-generation": XmodForCausalLM,
+            "token-classification": XmodForTokenClassification,
+            "zero-shot": XmodForSequenceClassification,
+        }
+        if is_torch_available()
+        else {}
+    )

    def setUp(self):
        self.model_tester = XmodModelTester(self)

--- a/tests/models/yolos/test_modeling_yolos.py
+++ b/tests/models/yolos/test_modeling_yolos.py
@@ -24,6 +24,7 @@ from transformers.utils import cached_property, is_torch_available, is_vision_av

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, floats_tensor
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -160,13 +161,16 @@ class YolosModelTester:


 @require_torch
-class YolosModelTest(ModelTesterMixin, unittest.TestCase):
+class YolosModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as YOLOS does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = (YolosModel, YolosForObjectDetection) if is_torch_available() else ()
+    pipeline_model_mapping = (
+        {"feature-extraction": YolosModel, "object-detection": YolosForObjectDetection} if is_torch_available() else {}
+    )

    test_pruning = False
    test_resize_embeddings = False

--- a/tests/models/yoso/test_modeling_yoso.py
+++ b/tests/models/yoso/test_modeling_yoso.py
@@ -22,6 +22,7 @@ from transformers.testing_utils import require_torch, slow, torch_device

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin


 if is_torch_available():
@@ -280,7 +281,7 @@ class YosoModelTester:


 @require_torch
-class YosoModelTest(ModelTesterMixin, unittest.TestCase):
+class YosoModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
            YosoModel,
@@ -298,6 +299,18 @@ class YosoModelTest(ModelTesterMixin, unittest.TestCase):
    test_torchscript = False

    all_generative_model_classes = ()
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": YosoModel,
+            "fill-mask": YosoForMaskedLM,
+            "question-answering": YosoForQuestionAnswering,
+            "text-classification": YosoForSequenceClassification,
+            "token-classification": YosoForTokenClassification,
+            "zero-shot": YosoForSequenceClassification,
+        }
+        if is_torch_available()
+        else {}
+    )

    def setUp(self):
        self.model_tester = YosoModelTester(self)

--- a/tests/pipelines/test_pipelines_audio_classification.py
+++ b/tests/pipelines/test_pipelines_audio_classification.py
@@ -20,11 +20,11 @@ from transformers import MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
 from transformers.pipelines import AudioClassificationPipeline, pipeline
 from transformers.testing_utils import nested_simplify, require_tf, require_torch, require_torchaudio, slow

-from .test_pipelines_common import ANY, PipelineTestCaseMeta
+from .test_pipelines_common import ANY


 @require_torch
-class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+class AudioClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING

    def get_test_pipeline(self, model, tokenizer, processor):

--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -42,7 +42,7 @@ from transformers.testing_utils import (
    slow,
 )

-from .test_pipelines_common import ANY, PipelineTestCaseMeta
+from .test_pipelines_common import ANY


 if is_torch_available():
@@ -53,7 +53,7 @@ if is_torch_available():
 # from .test_pipelines_common import CustomInputPipelineCommonMixin


-class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
    model_mapping = {
        k: v
        for k, v in (list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING.items()) if MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING else [])

--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -12,20 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import copy
 import logging
 import os
-import random
 import sys
 import tempfile
 import unittest
-from abc import abstractmethod
 from pathlib import Path
-from unittest import skipIf

 import datasets
 import numpy as np
-import requests
 from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token
 from requests.exceptions import HTTPError

@@ -82,255 +77,6 @@ class ANY:
        return f"ANY({', '.join(_type.__name__ for _type in self._types)})"


-def is_test_to_skip(test_casse_name, config_class, model_architecture, tokenizer_name, processor_name):
-    """Some tests are just not working"""
-
-    to_skip = False
-
-    if config_class.__name__ == "RoCBertConfig" and test_casse_name in [
-        "FillMaskPipelineTests",
-        "FeatureExtractionPipelineTests",
-        "TextClassificationPipelineTests",
-        "TokenClassificationPipelineTests",
-    ]:
-        # Get error: IndexError: index out of range in self.
-        # `word_shape_file` and `word_pronunciation_file` should be shrunk during tiny model creation,
-        # otherwise `IndexError` could occur in some embedding layers. Skip for now until this model has
-        # more usage.
-        to_skip = True
-    elif config_class.__name__ in ["LayoutLMv3Config", "LiltConfig"]:
-        # Get error: ValueError: Words must be of type `List[str]`. Previously, `LayoutLMv3` is not
-        # used in pipeline tests as it could not find a checkpoint
-        # TODO: check and fix if possible
-        to_skip = True
-    # config/model class we decide to skip
-    elif config_class.__name__ in ["TapasConfig"]:
-        # Get error: AssertionError: Table must be of type pd.DataFrame. Also, the tiny model has large
-        # vocab size as the fast tokenizer could not be converted. Previous, `Tapas` is not used in
-        # pipeline tests due to the same reason.
-        # TODO: check and fix if possible
-        to_skip = True
-
-    # TODO: check and fix if possible
-    if not to_skip and tokenizer_name is not None:
-        if (
-            test_casse_name == "QAPipelineTests"
-            and not tokenizer_name.endswith("Fast")
-            and config_class.__name__
-            in [
-                "FlaubertConfig",
-                "GPTJConfig",
-                "LongformerConfig",
-                "MvpConfig",
-                "OPTConfig",
-                "ReformerConfig",
-                "XLMConfig",
-            ]
-        ):
-            # `QAPipelineTests` fails for a few models when the slower tokenizer are used.
-            # (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
-            # TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
-            to_skip = True
-        elif test_casse_name == "ZeroShotClassificationPipelineTests" and config_class.__name__ in [
-            "CTRLConfig",
-            "OpenAIGPTConfig",
-        ]:
-            # Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
-            # `CTRLConfig` and `OpenAIGPTConfig` were never used in pipeline tests, either because of a missing
-            # checkpoint or because a tiny config could not be created
-            to_skip = True
-        elif test_casse_name == "TranslationPipelineTests" and config_class.__name__ in [
-            "M2M100Config",
-            "PLBartConfig",
-        ]:
-            # Get `ValueError: Translation requires a `src_lang` and a `tgt_lang` for this model`.
-            # `M2M100Config` and `PLBartConfig` were never used in pipeline tests: cannot create a simple tokenizer
-            to_skip = True
-        elif test_casse_name == "TextGenerationPipelineTests" and config_class.__name__ in [
-            "ProphetNetConfig",
-            "TransfoXLConfig",
-        ]:
-            # Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
-            # `TransfoXLConfig` and `ProphetNetConfig` were never used in pipeline tests: cannot create a simple
-            # tokenizer.
-            to_skip = True
-        elif test_casse_name == "FillMaskPipelineTests" and config_class.__name__ in [
-            "FlaubertConfig",
-            "XLMConfig",
-        ]:
-            # Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
-            # `FlaubertConfig` and `TransfoXLConfig` were never used in pipeline tests: cannot create a simple
-            # tokenizer
-            to_skip = True
-        elif test_casse_name == "TextGenerationPipelineTests" and model_architecture.__name__ in [
-            "TFRoFormerForCausalLM"
-        ]:
-            # TODO: add `prepare_inputs_for_generation` for `TFRoFormerForCausalLM`
-            to_skip = True
-        elif test_casse_name == "QAPipelineTests" and model_architecture.__name__ in ["FNetForQuestionAnswering"]:
-            # TODO: The change in `base.py` in the PR #21132 (https://github.com/huggingface/transformers/pull/21132)
-            #       fails this test case. Skip for now - a fix for this along with the initial changes in PR #20426 is
-            #       too much. Let `ydshieh` to fix it ASAP once #20426 is merged.
-            to_skip = True
-        elif config_class.__name__ == "LayoutLMv2Config" and test_casse_name in [
-            "QAPipelineTests",
-            "TextClassificationPipelineTests",
-            "TokenClassificationPipelineTests",
-            "ZeroShotClassificationPipelineTests",
-        ]:
-            # `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
-            # config. With new tiny model creation, it is available, but we need to fix the failed tests.
-            to_skip = True
-        elif test_casse_name == "DocumentQuestionAnsweringPipelineTests" and not tokenizer_name.endswith("Fast"):
-            # This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
-            to_skip = True
-
-    return to_skip
-
-
-def validate_test_components(test_case, model, tokenizer, processor):
-    # TODO: Move this to tiny model creation script
-    # head-specific (within a model type) necessary changes to the config
-    # 1. for `BlenderbotForCausalLM`
-    if model.__class__.__name__ == "BlenderbotForCausalLM":
-        model.config.encoder_no_repeat_ngram_size = 0
-
-    # TODO: Change the tiny model creation script: don't create models with problematic tokenizers
-    # Avoid `IndexError` in embedding layers
-    CONFIG_WITHOUT_VOCAB_SIZE = ["CanineConfig"]
-    if tokenizer is not None:
-        config_vocab_size = getattr(model.config, "vocab_size", None)
-        # For CLIP-like models
-        if config_vocab_size is None and hasattr(model.config, "text_config"):
-            config_vocab_size = getattr(model.config.text_config, "vocab_size", None)
-        if config_vocab_size is None and model.config.__class__.__name__ not in CONFIG_WITHOUT_VOCAB_SIZE:
-            raise ValueError(
-                "Could not determine `vocab_size` from model configuration while `tokenizer` is not `None`."
-            )
-        # TODO: Remove tiny models from the Hub which have problematic tokenizers (but still keep this block)
-        if config_vocab_size is not None and len(tokenizer) > config_vocab_size:
-            test_case.skipTest(
-                f"Ignore {model.__class__.__name__}: `tokenizer` ({tokenizer.__class__.__name__}) has"
-                f" {len(tokenizer)} tokens which is greater than `config_vocab_size`"
-                f" ({config_vocab_size}). Something is wrong."
-            )
-
-
-class PipelineTestCaseMeta(type):
-    def __new__(mcs, name, bases, dct):
-        def gen_test(repo_name, model_architecture, tokenizer_name, processor_name):
-            @skipIf(
-                tokenizer_name is None and processor_name is None,
-                f"Ignore {model_architecture.__name__}: no processor class is provided (tokenizer, image processor,"
-                " feature extractor, etc)",
-            )
-            def test(self):
-                repo_id = f"hf-internal-testing/{repo_name}"
-
-                tokenizer = None
-                if tokenizer_name is not None:
-                    tokenizer_class = getattr(transformers_module, tokenizer_name)
-                    tokenizer = tokenizer_class.from_pretrained(repo_id)
-
-                processor = None
-                if processor_name is not None:
-                    processor_class = getattr(transformers_module, processor_name)
-                    # If the required packages (like `Pillow`) are not installed, this will fail.
-                    try:
-                        processor = processor_class.from_pretrained(repo_id)
-                    except Exception:
-                        self.skipTest(f"Ignore {model_architecture.__name__}: could not load the model from {repo_id}")
-
-                try:
-                    model = model_architecture.from_pretrained(repo_id)
-                except Exception:
-                    self.skipTest(f"Ignore {model_architecture.__name__}: could not load the model from {repo_id}")
-
-                # validate
-                validate_test_components(self, model, tokenizer, processor)
-
-                if hasattr(model, "eval"):
-                    model = model.eval()
-
-                pipeline, examples = self.get_test_pipeline(model, tokenizer, processor)
-                if pipeline is None:
-                    # The test can disable itself, but it should be very marginal
-                    # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
-                    self.skipTest(f"Ignore {model_architecture.__name__}: could not create the pipeline")
-                self.run_pipeline_test(pipeline, examples)
-
-                def run_batch_test(pipeline, examples):
-                    # Need to copy because `Conversation` are stateful
-                    if pipeline.tokenizer is not None and pipeline.tokenizer.pad_token_id is None:
-                        return  # No batching for this and it's OK
-
-                    # 10 examples with batch size 4 means there needs to be a unfinished batch
-                    # which is important for the unbatcher
-                    def data(n):
-                        for _ in range(n):
-                            # Need to copy because Conversation object is mutated
-                            yield copy.deepcopy(random.choice(examples))
-
-                    out = []
-                    for item in pipeline(data(10), batch_size=4):
-                        out.append(item)
-                    self.assertEqual(len(out), 10)
-
-                run_batch_test(pipeline, examples)
-
-            return test
-
-        # Download tiny model summary (used to avoid requesting from Hub too many times)
-        url = "https://huggingface.co/datasets/hf-internal-testing/tiny-random-model-summary/raw/main/processor_classes.json"
-        tiny_model_summary = requests.get(url).json()
-
-        for prefix, key in [("pt", "model_mapping"), ("tf", "tf_model_mapping")]:
-            mapping = dct.get(key, {})
-            if mapping:
-                for config_class, model_architectures in mapping.items():
-                    if not isinstance(model_architectures, tuple):
-                        model_architectures = (model_architectures,)
-
-                    for model_architecture in model_architectures:
-                        model_arch_name = model_architecture.__name__
-                        # Get the canonical name
-                        for _prefix in ["Flax", "TF"]:
-                            if model_arch_name.startswith(_prefix):
-                                model_arch_name = model_arch_name[len(_prefix) :]
-                                break
-
-                        tokenizer_names = []
-                        processor_names = []
-                        if model_arch_name in tiny_model_summary:
-                            tokenizer_names = tiny_model_summary[model_arch_name]["tokenizer_classes"]
-                            processor_names = tiny_model_summary[model_arch_name]["processor_classes"]
-                        # Adding `None` (if empty) so we can generate tests
-                        tokenizer_names = [None] if len(tokenizer_names) == 0 else tokenizer_names
-                        processor_names = [None] if len(processor_names) == 0 else processor_names
-
-                        repo_name = f"tiny-random-{model_arch_name}"
-                        for tokenizer_name in tokenizer_names:
-                            for processor_name in processor_names:
-                                if is_test_to_skip(
-                                    name, config_class, model_architecture, tokenizer_name, processor_name
-                                ):
-                                    continue
-                                test_name = f"test_{prefix}_{config_class.__name__}_{model_architecture.__name__}_{tokenizer_name}_{processor_name}"
-                                dct[test_name] = gen_test(
-                                    repo_name, model_architecture, tokenizer_name, processor_name
-                                )
-
-        @abstractmethod
-        def inner(self):
-            raise NotImplementedError("Not implemented test")
-
-        # Force these 2 methods to exist
-        dct["test_small_model_pt"] = dct.get("test_small_model_pt", inner)
-        dct["test_small_model_tf"] = dct.get("test_small_model_tf", inner)
-
-        return type.__new__(mcs, name, bases, dct)
-
-
 class CommonPipelineTest(unittest.TestCase):
    @require_torch
    def test_pipeline_iteration(self):

--- a/tests/pipelines/test_pipelines_conversational.py
+++ b/tests/pipelines/test_pipelines_conversational.py
@@ -31,13 +31,13 @@ from transformers import (
 )
 from transformers.testing_utils import require_tf, require_torch, slow, torch_device

-from .test_pipelines_common import ANY, PipelineTestCaseMeta
+from .test_pipelines_common import ANY


 DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0


-class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+class ConversationalPipelineTests(unittest.TestCase):
    model_mapping = dict(
        list(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.items())
        if MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

--- a/tests/pipelines/test_pipelines_depth_estimation.py
+++ b/tests/pipelines/test_pipelines_depth_estimation.py
@@ -19,7 +19,7 @@ from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available,
 from transformers.pipelines import DepthEstimationPipeline, pipeline
 from transformers.testing_utils import nested_simplify, require_tf, require_timm, require_torch, require_vision, slow

-from .test_pipelines_common import ANY, PipelineTestCaseMeta
+from .test_pipelines_common import ANY


 if is_torch_available():
@@ -43,7 +43,7 @@ def hashimage(image: Image) -> str:
 @require_vision
 @require_timm
 @require_torch
-class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+class DepthEstimationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING

    def get_test_pipeline(self, model, tokenizer, processor):

--- a/tests/pipelines/test_pipelines_document_question_answering.py
+++ b/tests/pipelines/test_pipelines_document_question_answering.py
@@ -27,7 +27,7 @@ from transformers.testing_utils import (
    slow,
 )

-from .test_pipelines_common import ANY, PipelineTestCaseMeta
+from .test_pipelines_common import ANY


 if is_vision_available():
@@ -54,7 +54,7 @@ INVOICE_URL = (

 @require_torch
 @require_vision
-class DocumentQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING

    @require_pytesseract