"tests/models/vscode:/vscode.git/clone" did not exist on "612b2a1a6d407081fe6dc296c19b4dba28abffa8"
Unverified Commit ba3264b4 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Image Feature Extraction pipeline (#28216)



* Draft pipeline

* Fixup

* Fix docstrings

* Update doctest

* Update pipeline_model_mapping

* Update docstring

* Update tests

* Update src/transformers/pipelines/image_feature_extraction.py
Co-authored-by: default avatarOmar Sanseviero <osanseviero@gmail.com>

* Fix docstrings - review comments

* Remove pipeline mapping for composite vision models

* Add to pipeline tests

* Remove for flava (multimodal)

* safe pil import

* Add requirements for pipeline run

* Account for super slow efficientnet

* Review comments

* Fix tests

* Swap order of kwargs

* Use build_pipeline_init_args

* Add back FE pipeline for Vilt

* Include image_processor_kwargs in docstring

* Mark test as flaky

* Update TODO

* Update tests/pipelines/test_pipelines_image_feature_extraction.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Add license header

---------
Co-authored-by: default avatarOmar Sanseviero <osanseviero@gmail.com>
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>
parent 7addc934
...@@ -428,7 +428,10 @@ class OwlViTModelTester: ...@@ -428,7 +428,10 @@ class OwlViTModelTester:
class OwlViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): class OwlViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (OwlViTModel,) if is_torch_available() else () all_model_classes = (OwlViTModel,) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": OwlViTModel, "zero-shot-object-detection": OwlViTForObjectDetection} {
"feature-extraction": OwlViTModel,
"zero-shot-object-detection": OwlViTForObjectDetection,
}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -124,7 +124,7 @@ class PoolFormerModelTester: ...@@ -124,7 +124,7 @@ class PoolFormerModelTester:
class PoolFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): class PoolFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (PoolFormerModel, PoolFormerForImageClassification) if is_torch_available() else () all_model_classes = (PoolFormerModel, PoolFormerForImageClassification) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": PoolFormerModel, "image-classification": PoolFormerForImageClassification} {"image-feature-extraction": PoolFormerModel, "image-classification": PoolFormerForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -158,7 +158,7 @@ def prepare_img(): ...@@ -158,7 +158,7 @@ def prepare_img():
class PvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): class PvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (PvtModel, PvtForImageClassification) if is_torch_available() else () all_model_classes = (PvtModel, PvtForImageClassification) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": PvtModel, "image-classification": PvtForImageClassification} {"image-feature-extraction": PvtModel, "image-classification": PvtForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -126,7 +126,7 @@ class RegNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -126,7 +126,7 @@ class RegNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (RegNetModel, RegNetForImageClassification) if is_torch_available() else () all_model_classes = (RegNetModel, RegNetForImageClassification) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": RegNetModel, "image-classification": RegNetForImageClassification} {"image-feature-extraction": RegNetModel, "image-classification": RegNetForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -170,7 +170,7 @@ class ResNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -170,7 +170,7 @@ class ResNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
else () else ()
) )
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": ResNetModel, "image-classification": ResNetForImageClassification} {"image-feature-extraction": ResNetModel, "image-classification": ResNetForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -171,7 +171,7 @@ class SegformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas ...@@ -171,7 +171,7 @@ class SegformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
) )
pipeline_model_mapping = ( pipeline_model_mapping = (
{ {
"feature-extraction": SegformerModel, "image-feature-extraction": SegformerModel,
"image-classification": SegformerForImageClassification, "image-classification": SegformerForImageClassification,
"image-segmentation": SegformerForSemanticSegmentation, "image-segmentation": SegformerForSemanticSegmentation,
} }
......
...@@ -139,7 +139,7 @@ class SwiftFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC ...@@ -139,7 +139,7 @@ class SwiftFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
all_model_classes = (SwiftFormerModel, SwiftFormerForImageClassification) if is_torch_available() else () all_model_classes = (SwiftFormerModel, SwiftFormerForImageClassification) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": SwiftFormerModel, "image-classification": SwiftFormerForImageClassification} {"image-feature-extraction": SwiftFormerModel, "image-classification": SwiftFormerForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -232,7 +232,7 @@ class SwinModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -232,7 +232,7 @@ class SwinModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
else () else ()
) )
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": SwinModel, "image-classification": SwinForImageClassification} {"image-feature-extraction": SwinModel, "image-classification": SwinForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -162,7 +162,7 @@ class Swin2SRModelTester: ...@@ -162,7 +162,7 @@ class Swin2SRModelTester:
class Swin2SRModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): class Swin2SRModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (Swin2SRModel, Swin2SRForImageSuperResolution) if is_torch_available() else () all_model_classes = (Swin2SRModel, Swin2SRForImageSuperResolution) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": Swin2SRModel, "image-to-image": Swin2SRForImageSuperResolution} {"image-feature-extraction": Swin2SRModel, "image-to-image": Swin2SRForImageSuperResolution}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -217,7 +217,7 @@ class Swinv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -217,7 +217,7 @@ class Swinv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
else () else ()
) )
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": Swinv2Model, "image-classification": Swinv2ForImageClassification} {"image-feature-extraction": Swinv2Model, "image-classification": Swinv2ForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -200,7 +200,7 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin ...@@ -200,7 +200,7 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
else () else ()
) )
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": TableTransformerModel, "object-detection": TableTransformerForObjectDetection} {"image-feature-extraction": TableTransformerModel, "object-detection": TableTransformerForObjectDetection}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -228,7 +228,7 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -228,7 +228,7 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
else () else ()
) )
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": ViltModel, "visual-question-answering": ViltForQuestionAnswering} {"image-feature-extraction": ViltModel, "visual-question-answering": ViltForQuestionAnswering}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -193,7 +193,7 @@ class ViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -193,7 +193,7 @@ class ViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
else () else ()
) )
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": ViTModel, "image-classification": ViTForImageClassification} {"image-feature-extraction": ViTModel, "image-classification": ViTForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -156,7 +156,7 @@ class ViTHybridModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas ...@@ -156,7 +156,7 @@ class ViTHybridModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
all_model_classes = (ViTHybridModel, ViTHybridForImageClassification) if is_torch_available() else () all_model_classes = (ViTHybridModel, ViTHybridForImageClassification) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": ViTHybridModel, "image-classification": ViTHybridForImageClassification} {"image-feature-extraction": ViTHybridModel, "image-classification": ViTHybridForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -164,7 +164,7 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -164,7 +164,7 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
""" """
all_model_classes = (ViTMAEModel, ViTMAEForPreTraining) if is_torch_available() else () all_model_classes = (ViTMAEModel, ViTMAEForPreTraining) if is_torch_available() else ()
pipeline_model_mapping = {"feature-extraction": ViTMAEModel} if is_torch_available() else {} pipeline_model_mapping = {"image-feature-extraction": ViTMAEModel} if is_torch_available() else {}
test_pruning = False test_pruning = False
test_torchscript = False test_torchscript = False
......
...@@ -152,7 +152,7 @@ class ViTMSNModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -152,7 +152,7 @@ class ViTMSNModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (ViTMSNModel, ViTMSNForImageClassification) if is_torch_available() else () all_model_classes = (ViTMSNModel, ViTMSNForImageClassification) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": ViTMSNModel, "image-classification": ViTMSNForImageClassification} {"image-feature-extraction": ViTMSNModel, "image-classification": ViTMSNForImageClassification}
if is_torch_available() if is_torch_available()
else {} else {}
) )
......
...@@ -168,7 +168,9 @@ class YolosModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -168,7 +168,9 @@ class YolosModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (YolosModel, YolosForObjectDetection) if is_torch_available() else () all_model_classes = (YolosModel, YolosForObjectDetection) if is_torch_available() else ()
pipeline_model_mapping = ( pipeline_model_mapping = (
{"feature-extraction": YolosModel, "object-detection": YolosForObjectDetection} if is_torch_available() else {} {"image-feature-extraction": YolosModel, "object-detection": YolosForObjectDetection}
if is_torch_available()
else {}
) )
test_pruning = False test_pruning = False
......
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import pytest
from transformers import (
MODEL_MAPPING,
TF_MODEL_MAPPING,
TOKENIZER_MAPPING,
ImageFeatureExtractionPipeline,
is_tf_available,
is_torch_available,
is_vision_available,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch
if is_torch_available():
import torch
if is_tf_available():
import tensorflow as tf
if is_vision_available():
from PIL import Image
# We will verify our results on an image of cute cats
def prepare_img():
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
return image
@is_pipeline_test
class ImageFeatureExtractionPipelineTests(unittest.TestCase):
model_mapping = MODEL_MAPPING
tf_model_mapping = TF_MODEL_MAPPING
@require_torch
def test_small_model_pt(self):
feature_extractor = pipeline(
task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="pt"
)
img = prepare_img()
outputs = feature_extractor(img)
self.assertEqual(
nested_simplify(outputs[0][0]),
[-1.417, -0.392, -1.264, -1.196, 1.648, 0.885, 0.56, -0.606, -1.175, 0.823, 1.912, 0.081, -0.053, 1.119, -0.062, -1.757, -0.571, 0.075, 0.959, 0.118, 1.201, -0.672, -0.498, 0.364, 0.937, -1.623, 0.228, 0.19, 1.697, -1.115, 0.583, -0.981]) # fmt: skip
@require_tf
def test_small_model_tf(self):
feature_extractor = pipeline(
task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="tf"
)
img = prepare_img()
outputs = feature_extractor(img)
self.assertEqual(
nested_simplify(outputs[0][0]),
[-1.417, -0.392, -1.264, -1.196, 1.648, 0.885, 0.56, -0.606, -1.175, 0.823, 1.912, 0.081, -0.053, 1.119, -0.062, -1.757, -0.571, 0.075, 0.959, 0.118, 1.201, -0.672, -0.498, 0.364, 0.937, -1.623, 0.228, 0.19, 1.697, -1.115, 0.583, -0.981]) # fmt: skip
@require_torch
def test_image_processing_small_model_pt(self):
feature_extractor = pipeline(
task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="pt"
)
# test with image processor parameters
image_processor_kwargs = {"size": {"height": 300, "width": 300}}
img = prepare_img()
with pytest.raises(ValueError):
# Image doesn't match model input size
feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
image_processor_kwargs = {"image_mean": [0, 0, 0], "image_std": [1, 1, 1]}
img = prepare_img()
outputs = feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
self.assertEqual(np.squeeze(outputs).shape, (226, 32))
@require_tf
def test_image_processing_small_model_tf(self):
feature_extractor = pipeline(
task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="tf"
)
# test with image processor parameters
image_processor_kwargs = {"size": {"height": 300, "width": 300}}
img = prepare_img()
with pytest.raises(ValueError):
# Image doesn't match model input size
feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
image_processor_kwargs = {"image_mean": [0, 0, 0], "image_std": [1, 1, 1]}
img = prepare_img()
outputs = feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
self.assertEqual(np.squeeze(outputs).shape, (226, 32))
@require_torch
def test_return_tensors_pt(self):
feature_extractor = pipeline(
task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="pt"
)
img = prepare_img()
outputs = feature_extractor(img, return_tensors=True)
self.assertTrue(torch.is_tensor(outputs))
@require_tf
def test_return_tensors_tf(self):
feature_extractor = pipeline(
task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="tf"
)
img = prepare_img()
outputs = feature_extractor(img, return_tensors=True)
self.assertTrue(tf.is_tensor(outputs))
def get_test_pipeline(self, model, tokenizer, processor):
if processor is None:
self.skipTest("No image processor")
elif type(model.config) in TOKENIZER_MAPPING:
self.skipTest("This is a bimodal model, we need to find a more consistent way to switch on those models.")
elif model.config.is_encoder_decoder:
self.skipTest(
"""encoder_decoder models are trickier for this pipeline.
Do we want encoder + decoder inputs to get some featues?
Do we want encoder only features ?
For now ignore those.
"""
)
feature_extractor = ImageFeatureExtractionPipeline(model=model, image_processor=processor)
img = prepare_img()
return feature_extractor, [img, img]
def run_pipeline_test(self, feature_extractor, examples):
imgs = examples
outputs = feature_extractor(imgs[0])
self.assertEqual(len(outputs), 1)
outputs = feature_extractor(imgs)
self.assertEqual(len(outputs), 2)
...@@ -39,6 +39,7 @@ from .pipelines.test_pipelines_document_question_answering import DocumentQuesti ...@@ -39,6 +39,7 @@ from .pipelines.test_pipelines_document_question_answering import DocumentQuesti
from .pipelines.test_pipelines_feature_extraction import FeatureExtractionPipelineTests from .pipelines.test_pipelines_feature_extraction import FeatureExtractionPipelineTests
from .pipelines.test_pipelines_fill_mask import FillMaskPipelineTests from .pipelines.test_pipelines_fill_mask import FillMaskPipelineTests
from .pipelines.test_pipelines_image_classification import ImageClassificationPipelineTests from .pipelines.test_pipelines_image_classification import ImageClassificationPipelineTests
from .pipelines.test_pipelines_image_feature_extraction import ImageFeatureExtractionPipelineTests
from .pipelines.test_pipelines_image_segmentation import ImageSegmentationPipelineTests from .pipelines.test_pipelines_image_segmentation import ImageSegmentationPipelineTests
from .pipelines.test_pipelines_image_to_image import ImageToImagePipelineTests from .pipelines.test_pipelines_image_to_image import ImageToImagePipelineTests
from .pipelines.test_pipelines_image_to_text import ImageToTextPipelineTests from .pipelines.test_pipelines_image_to_text import ImageToTextPipelineTests
...@@ -70,6 +71,7 @@ pipeline_test_mapping = { ...@@ -70,6 +71,7 @@ pipeline_test_mapping = {
"feature-extraction": {"test": FeatureExtractionPipelineTests}, "feature-extraction": {"test": FeatureExtractionPipelineTests},
"fill-mask": {"test": FillMaskPipelineTests}, "fill-mask": {"test": FillMaskPipelineTests},
"image-classification": {"test": ImageClassificationPipelineTests}, "image-classification": {"test": ImageClassificationPipelineTests},
"image-feature-extraction": {"test": ImageFeatureExtractionPipelineTests},
"image-segmentation": {"test": ImageSegmentationPipelineTests}, "image-segmentation": {"test": ImageSegmentationPipelineTests},
"image-to-image": {"test": ImageToImagePipelineTests}, "image-to-image": {"test": ImageToImagePipelineTests},
"image-to-text": {"test": ImageToTextPipelineTests}, "image-to-text": {"test": ImageToTextPipelineTests},
...@@ -374,6 +376,13 @@ class PipelineTesterMixin: ...@@ -374,6 +376,13 @@ class PipelineTesterMixin:
def test_pipeline_image_to_text(self): def test_pipeline_image_to_text(self):
self.run_task_tests(task="image-to-text") self.run_task_tests(task="image-to-text")
@is_pipeline_test
@require_timm
@require_vision
@require_torch
def test_pipeline_image_feature_extraction(self):
self.run_task_tests(task="image-feature-extraction")
@unittest.skip(reason="`run_pipeline_test` is currently not implemented.") @unittest.skip(reason="`run_pipeline_test` is currently not implemented.")
@is_pipeline_test @is_pipeline_test
@require_vision @require_vision
......
...@@ -324,6 +324,7 @@ OBJECTS_TO_IGNORE = [ ...@@ -324,6 +324,7 @@ OBJECTS_TO_IGNORE = [
"IdeficsConfig", "IdeficsConfig",
"IdeficsProcessor", "IdeficsProcessor",
"ImageClassificationPipeline", "ImageClassificationPipeline",
"ImageFeatureExtractionPipeline",
"ImageGPTConfig", "ImageGPTConfig",
"ImageSegmentationPipeline", "ImageSegmentationPipeline",
"ImageToImagePipeline", "ImageToImagePipeline",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment