Move `image-classification` pipeline to new testing (#13272)

- Enforce `test_small_models_{tf,pt}` methods to exist (enforce checking actual values in small tests) - Add support for non RGB image for the pipeline.

Move `image-classification` pipeline to new testing (#13272)
- Enforce `test_small_models_{tf,pt}` methods to exist (enforce checking actual values in small tests) - Add support for non RGB image for the pipeline.
6b586ed1 · Nicolas Patry · GitHub · 401377e6 · 6b586ed1 · 6b586ed1
Unverified Commit 6b586ed1 authored Aug 26, 2021 by Nicolas Patry Committed by GitHub Aug 26, 2021
6 changed files
--- a/src/transformers/pipelines/image_classification.py
+++ b/src/transformers/pipelines/image_classification.py
@@ -61,15 +61,21 @@ class ImageClassificationPipeline(Pipeline):
            if image.startswith("http://") or image.startswith("https://"):
                # We need to actually check for a real protocol, otherwise it's impossible to use a local file
                # like http_huggingface_co.png
-                return Image.open(requests.get(image, stream=True).raw)
+                image = Image.open(requests.get(image, stream=True).raw)
            elif os.path.isfile(image):
-                return Image.open(image)
+                image = Image.open(image)
+            else:
+                raise ValueError(
+                    f"Incorrect path or url, URLs must start with `http://` or `https://`, and {image} is not a valid path"
+                )
        elif isinstance(image, Image.Image):
-            return image
+            image = image
+        else:
-        raise ValueError(
+            raise ValueError(
-            "Incorrect format used for image. Should be an url linking to an image, a local path, or a PIL image."
+                "Incorrect format used for image. Should be an url linking to an image, a local path, or a PIL image."
-        )
+            )
+        image = image.convert("RGB")
+        return image
    def __call__(self, images: Union[str, List[str], "Image", List["Image"]], top_k=5):
        """

--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -15,6 +15,7 @@
 import importlib
 import logging
 import string
+from abc import abstractmethod
 from functools import lru_cache
 from typing import List, Optional
 from unittest import mock, skipIf
@@ -123,15 +124,18 @@ class PipelineTestCaseMeta(type):
                model = ModelClass(tiny_config)
                if hasattr(model, "eval"):
                    model = model.eval()
-                try:
+                if tokenizer_class is not None:
-                    tokenizer = get_tiny_tokenizer_from_checkpoint(checkpoint)
+                    try:
-                    if hasattr(model.config, "max_position_embeddings"):
+                        tokenizer = get_tiny_tokenizer_from_checkpoint(checkpoint)
-                        tokenizer.model_max_length = model.config.max_position_embeddings
+                        if hasattr(model.config, "max_position_embeddings"):
-                # Rust Panic exception are NOT Exception subclass
+                            tokenizer.model_max_length = model.config.max_position_embeddings
-                # Some test tokenizer contain broken vocabs or custom PreTokenizer, so we
+                    # Rust Panic exception are NOT Exception subclass
-                # provide some default tokenizer and hope for the best.
+                    # Some test tokenizer contain broken vocabs or custom PreTokenizer, so we
-                except:  # noqa: E722
+                    # provide some default tokenizer and hope for the best.
-                    self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
+                    except:  # noqa: E722
+                        self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
+                else:
+                    tokenizer = None
                feature_extractor = get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config)
                self.run_pipeline_test(model, tokenizer, feature_extractor)
@@ -149,16 +153,21 @@ class PipelineTestCaseMeta(type):
                        tiny_config = get_tiny_config_from_class(configuration)
                        tokenizer_classes = TOKENIZER_MAPPING.get(configuration, [])
                        feature_extractor_class = FEATURE_EXTRACTOR_MAPPING.get(configuration, None)
+                        feature_extractor_name = (
+                            feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor"
+                        )
+                        if not tokenizer_classes:
+                            # We need to test even if there are no tokenizers.
+                            tokenizer_classes = [None]
                        for tokenizer_class in tokenizer_classes:
-                            if tokenizer_class is not None and tokenizer_class.__name__.endswith("Fast"):
+                            if tokenizer_class is not None:
+                                tokenizer_name = tokenizer_class.__name__
+                            else:
+                                tokenizer_name = "notokenizer"
-                                tokenizer_name = tokenizer_class.__name__ if tokenizer_class else "notokenizer"
+                            test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}"
-                                feature_extractor_name = (
-                                    feature_extractor_class.__name__
+                            if tokenizer_class is not None or feature_extractor_class is not None:
-                                    if feature_extractor_class
-                                    else "nofeature_extractor"
-                                )
-                                test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}"
                                dct[test_name] = gen_test(
                                    model_architecture,
                                    checkpoint,
@@ -167,6 +176,14 @@ class PipelineTestCaseMeta(type):
                                    feature_extractor_class,
                                )
+        @abstractmethod
+        def inner(self):
+            raise NotImplementedError("Not implemented test")
+        # Force these 2 methods to exist
+        dct["test_small_model_pt"] = dct.get("test_small_model_pt", inner)
+        dct["test_small_model_tf"] = dct.get("test_small_model_tf", inner)
        return type.__new__(mcs, name, bases, dct)

--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -26,9 +26,10 @@ from transformers import (
    BlenderbotSmallTokenizer,
    Conversation,
    ConversationalPipeline,
+    TFAutoModelForCausalLM,
    pipeline,
 )
-from transformers.testing_utils import is_pipeline_test, require_torch, slow, torch_device
+from transformers.testing_utils import is_pipeline_test, require_tf, require_torch, slow, torch_device
 from .test_pipelines_common import ANY, PipelineTestCaseMeta
@@ -160,6 +161,24 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
        self.assertEqual(result.past_user_inputs[1], "Is it an action movie?")
        self.assertEqual(result.generated_responses[1], "It's a comedy.")
+    @require_torch
+    def test_small_model_pt(self):
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
+        model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+        conversation = Conversation("hello")
+        output = conversation_agent(conversation)
+        self.assertEqual(output, Conversation(past_user_inputs=["hello"], generated_responses=["Hi"]))
+    @require_tf
+    def test_small_model_tf(self):
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
+        model = TFAutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+        conversation = Conversation("hello")
+        output = conversation_agent(conversation)
+        self.assertEqual(output, Conversation(past_user_inputs=["hello"], generated_responses=["Hi"]))
    @require_torch
    @slow
    def test_integration_torch_conversation_dialogpt_input_ids(self):

--- a/tests/test_pipelines_feature_extraction.py
+++ b/tests/test_pipelines_feature_extraction.py
@@ -14,7 +14,7 @@
 import unittest
-from transformers import MODEL_MAPPING, TF_MODEL_MAPPING, FeatureExtractionPipeline, LxmertConfig, pipeline
+from transformers import MODEL_MAPPING, TF_MODEL_MAPPING, CLIPConfig, FeatureExtractionPipeline, LxmertConfig, pipeline
 from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch
 from .test_pipelines_common import PipelineTestCaseMeta
@@ -62,20 +62,29 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
        return shape
    def run_pipeline_test(self, model, tokenizer, feature_extractor):
-        if isinstance(model.config, LxmertConfig):
+        if tokenizer is None:
-            # This is an bimodal model, we need to find a more consistent way
+            self.skipTest("No tokenizer")
-            # to switch on those models.
+            return
+        elif isinstance(model.config, (LxmertConfig, CLIPConfig)):
+            self.skipTest(
+                "This is an Lxmert bimodal model, we need to find a more consistent way to switch on those models."
+            )
+            return
+        elif model.config.is_encoder_decoder:
+            self.skipTest(
+                """encoder_decoder models are trickier for this pipeline.
+                Do we want encoder + decoder inputs to get some featues?
+                Do we want encoder only features ?
+                For now ignore those.
+                """
+            )
            return
        feature_extractor = FeatureExtractionPipeline(
            model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
        )
-        if feature_extractor.model.config.is_encoder_decoder:
-            # encoder_decoder models are trickier for this pipeline.
-            # Do we want encoder + decoder inputs to get some featues?
-            # Do we want encoder only features ?
-            # For now ignore those.
-            return
        outputs = feature_extractor("This is a test")

--- a/tests/test_pipelines_fill_mask.py
+++ b/tests/test_pipelines_fill_mask.py
@@ -169,8 +169,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
        self.run_pipeline_test(unmasker.model, unmasker.tokenizer, None)
    def run_pipeline_test(self, model, tokenizer, feature_extractor):
-        if tokenizer.mask_token_id is None:
+        if tokenizer is None or tokenizer.mask_token_id is None:
-            self.skipTest("The provided tokenizer has no mask token, (probably reformer)")
+            self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)

--- a/tests/test_pipelines_image_classification.py
+++ b/tests/test_pipelines_image_classification.py
@@ -14,15 +14,18 @@
 import unittest
-from transformers import (
+from transformers import MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, PreTrainedTokenizer, is_vision_available
-    AutoConfig,
-    AutoFeatureExtractor,
-    AutoModelForImageClassification,
-    PreTrainedTokenizer,
-    is_vision_available,
-)
 from transformers.pipelines import ImageClassificationPipeline, pipeline
-from transformers.testing_utils import require_torch, require_vision
+from transformers.testing_utils import (
+    is_pipeline_test,
+    nested_simplify,
+    require_datasets,
+    require_tf,
+    require_torch,
+    require_vision,
+)
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
 if is_vision_available():
@@ -35,127 +38,115 @@ else:
            pass
+@is_pipeline_test
 @require_vision
 @require_torch
-class ImageClassificationPipelineTests(unittest.TestCase):
+class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
-    pipeline_task = "image-classification"
+    model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
-    small_models = ["lysandre/tiny-vit-random"]  # Models tested without the @slow decorator
-    valid_inputs = [
+    @require_datasets
-        {"images": "http://images.cocodataset.org/val2017/000000039769.jpg"},
+    def run_pipeline_test(self, model, tokenizer, feature_extractor):
-        {
+        image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor)
-            "images": [
+        outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
-                "http://images.cocodataset.org/val2017/000000039769.jpg",
-                "http://images.cocodataset.org/val2017/000000039769.jpg",
+        self.assertEqual(
-            ]
+            outputs,
-        },
+            [
-        {"images": "./tests/fixtures/tests_samples/COCO/000000039769.png"},
+                {"score": ANY(float), "label": ANY(str)},
-        {
+                {"score": ANY(float), "label": ANY(str)},
-            "images": [
+            ],
-                "./tests/fixtures/tests_samples/COCO/000000039769.png",
+        )
-                "./tests/fixtures/tests_samples/COCO/000000039769.png",
-            ]
+        import datasets
-        },
-        {"images": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")},
+        dataset = datasets.load_dataset("Narsil/image_dummy", "image", split="test")
-        {
-            "images": [
+        # Accepts URL + PIL.Image + lists
-                Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
+        outputs = image_classifier(
-                Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
+            [
-            ]
-        },
-        {
-            "images": [
                Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
-                "./tests/fixtures/tests_samples/COCO/000000039769.png",
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                # RGBA
+                dataset[0]["file"],
+                # LA
+                dataset[1]["file"],
+                # L
+                dataset[2]["file"],
            ]
-        },
+        )
-    ]
+        self.assertEqual(
+            outputs,
-    def test_small_model_from_factory(self):
+            [
-        for small_model in self.small_models:
+                [
+                    {"score": ANY(float), "label": ANY(str)},
-            image_classifier = pipeline("image-classification", model=small_model)
+                    {"score": ANY(float), "label": ANY(str)},
+                ],
-            for valid_input in self.valid_inputs:
+                [
-                output = image_classifier(**valid_input)
+                    {"score": ANY(float), "label": ANY(str)},
-                top_k = valid_input.get("top_k", 5)
+                    {"score": ANY(float), "label": ANY(str)},
+                ],
-                def assert_valid_pipeline_output(pipeline_output):
+                [
-                    self.assertTrue(isinstance(pipeline_output, list))
+                    {"score": ANY(float), "label": ANY(str)},
-                    self.assertEqual(len(pipeline_output), top_k)
+                    {"score": ANY(float), "label": ANY(str)},
-                    for label_result in pipeline_output:
+                ],
-                        self.assertTrue(isinstance(label_result, dict))
+                [
-                        self.assertIn("label", label_result)
+                    {"score": ANY(float), "label": ANY(str)},
-                        self.assertIn("score", label_result)
+                    {"score": ANY(float), "label": ANY(str)},
+                ],
-                if isinstance(valid_input["images"], list):
+                [
-                    self.assertEqual(len(valid_input["images"]), len(output))
+                    {"score": ANY(float), "label": ANY(str)},
-                    for individual_output in output:
+                    {"score": ANY(float), "label": ANY(str)},
-                        assert_valid_pipeline_output(individual_output)
+                ],
-                else:
+            ],
-                    assert_valid_pipeline_output(output)
+        )
-    def test_small_model_from_pipeline(self):
+    @require_torch
-        for small_model in self.small_models:
+    def test_small_model_pt(self):
+        small_model = "lysandre/tiny-vit-random"
-            model = AutoModelForImageClassification.from_pretrained(small_model)
+        image_classifier = pipeline("image-classification", model=small_model)
-            feature_extractor = AutoFeatureExtractor.from_pretrained(small_model)
-            image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor)
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
-            for valid_input in self.valid_inputs:
+            nested_simplify(outputs, decimals=4),
-                output = image_classifier(**valid_input)
+            [
-                top_k = valid_input.get("top_k", 5)
+                {"score": 0.0015, "label": "chambered nautilus, pearly nautilus, nautilus"},
+                {"score": 0.0015, "label": "pajama, pyjama, pj's, jammies"},
-                def assert_valid_pipeline_output(pipeline_output):
+                {"score": 0.0014, "label": "trench coat"},
-                    self.assertTrue(isinstance(pipeline_output, list))
+                {"score": 0.0014, "label": "handkerchief, hankie, hanky, hankey"},
-                    self.assertEqual(len(pipeline_output), top_k)
+                {"score": 0.0014, "label": "baboon"},
-                    for label_result in pipeline_output:
+            ],
-                        self.assertTrue(isinstance(label_result, dict))
+        )
-                        self.assertIn("label", label_result)
-                        self.assertIn("score", label_result)
+        outputs = image_classifier(
+            [
-                if isinstance(valid_input["images"], list):
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
-                    # When images are batched, pipeline output is a list of lists of dictionaries
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
-                    self.assertEqual(len(valid_input["images"]), len(output))
+            ],
-                    for individual_output in output:
+            top_k=2,
-                        assert_valid_pipeline_output(individual_output)
+        )
-                else:
+        self.assertEqual(
-                    # When images are batched, pipeline output is a list of dictionaries
+            nested_simplify(outputs, decimals=4),
-                    assert_valid_pipeline_output(output)
+            [
+                [
+                    {"score": 0.0015, "label": "chambered nautilus, pearly nautilus, nautilus"},
+                    {"score": 0.0015, "label": "pajama, pyjama, pj's, jammies"},
+                ],
+                [
+                    {"score": 0.0015, "label": "chambered nautilus, pearly nautilus, nautilus"},
+                    {"score": 0.0015, "label": "pajama, pyjama, pj's, jammies"},
+                ],
+            ],
+        )
+    @require_tf
+    @unittest.skip("Image classification is not implemented for TF")
+    def test_small_model_tf(self):
+        pass
    def test_custom_tokenizer(self):
        tokenizer = PreTrainedTokenizer()
        # Assert that the pipeline can be initialized with a feature extractor that is not in any mapping
-        image_classifier = pipeline("image-classification", model=self.small_models[0], tokenizer=tokenizer)
+        image_classifier = pipeline("image-classification", model="lysandre/tiny-vit-random", tokenizer=tokenizer)
        self.assertIs(image_classifier.tokenizer, tokenizer)
-    def test_num_labels_inferior_to_topk(self):
-        for small_model in self.small_models:
-            num_labels = 2
-            model = AutoModelForImageClassification.from_config(
-                AutoConfig.from_pretrained(small_model, num_labels=num_labels)
-            )
-            feature_extractor = AutoFeatureExtractor.from_pretrained(small_model)
-            image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor)
-            for valid_input in self.valid_inputs:
-                output = image_classifier(**valid_input)
-                def assert_valid_pipeline_output(pipeline_output):
-                    self.assertTrue(isinstance(pipeline_output, list))
-                    self.assertEqual(len(pipeline_output), num_labels)
-                    for label_result in pipeline_output:
-                        self.assertTrue(isinstance(label_result, dict))
-                        self.assertIn("label", label_result)
-                        self.assertIn("score", label_result)
-                if isinstance(valid_input["images"], list):
-                    # When images are batched, pipeline output is a list of lists of dictionaries
-                    self.assertEqual(len(valid_input["images"]), len(output))
-                    for individual_output in output:
-                        assert_valid_pipeline_output(individual_output)
-                else:
-                    # When images are batched, pipeline output is a list of dictionaries
-                    assert_valid_pipeline_output(output)