Unverified Commit 99e79054 authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Supporting `ImageProcessor` in place of `FeatureExtractor` for pipelines (#20851)



* Fixing the pipeline with image processor.

* Update the slow test.

* Using only the first image processor.

* Include exclusion mecanism for Image processor.

* Do not handle Gitconfig, deemed as a bug.

* Apply suggestions from code review
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Remove `conversational` changes. They are not supposed to be here.

* Address first row of comments.

* Remove OneFormer modifications.
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent efdbad56
...@@ -31,8 +31,10 @@ from huggingface_hub import model_info ...@@ -31,8 +31,10 @@ from huggingface_hub import model_info
from ..configuration_utils import PretrainedConfig from ..configuration_utils import PretrainedConfig
from ..dynamic_module_utils import get_class_from_dynamic_module from ..dynamic_module_utils import get_class_from_dynamic_module
from ..feature_extraction_utils import PreTrainedFeatureExtractor from ..feature_extraction_utils import PreTrainedFeatureExtractor
from ..image_processing_utils import BaseImageProcessor
from ..models.auto.configuration_auto import AutoConfig from ..models.auto.configuration_auto import AutoConfig
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
from ..models.auto.modeling_auto import AutoModelForDepthEstimation from ..models.auto.modeling_auto import AutoModelForDepthEstimation
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
from ..tokenization_utils import PreTrainedTokenizer from ..tokenization_utils import PreTrainedTokenizer
...@@ -374,6 +376,7 @@ SUPPORTED_TASKS = { ...@@ -374,6 +376,7 @@ SUPPORTED_TASKS = {
} }
NO_FEATURE_EXTRACTOR_TASKS = set() NO_FEATURE_EXTRACTOR_TASKS = set()
NO_IMAGE_PROCESSOR_TASKS = set()
NO_TOKENIZER_TASKS = set() NO_TOKENIZER_TASKS = set()
# Those model configs are special, they are generic over their task, meaning # Those model configs are special, they are generic over their task, meaning
# any tokenizer/feature_extractor might be use for a given model so we cannot # any tokenizer/feature_extractor might be use for a given model so we cannot
...@@ -383,6 +386,7 @@ MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig ...@@ -383,6 +386,7 @@ MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig
for task, values in SUPPORTED_TASKS.items(): for task, values in SUPPORTED_TASKS.items():
if values["type"] == "text": if values["type"] == "text":
NO_FEATURE_EXTRACTOR_TASKS.add(task) NO_FEATURE_EXTRACTOR_TASKS.add(task)
NO_IMAGE_PROCESSOR_TASKS.add(task)
elif values["type"] in {"audio", "image", "video"}: elif values["type"] in {"audio", "image", "video"}:
NO_TOKENIZER_TASKS.add(task) NO_TOKENIZER_TASKS.add(task)
elif values["type"] != "multimodal": elif values["type"] != "multimodal":
...@@ -482,6 +486,7 @@ def pipeline( ...@@ -482,6 +486,7 @@ def pipeline(
config: Optional[Union[str, PretrainedConfig]] = None, config: Optional[Union[str, PretrainedConfig]] = None,
tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None,
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
image_processor: Optional[Union[str, BaseImageProcessor]] = None,
framework: Optional[str] = None, framework: Optional[str] = None,
revision: Optional[str] = None, revision: Optional[str] = None,
use_fast: bool = True, use_fast: bool = True,
...@@ -766,6 +771,7 @@ def pipeline( ...@@ -766,6 +771,7 @@ def pipeline(
load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
if ( if (
tokenizer is None tokenizer is None
...@@ -799,6 +805,8 @@ def pipeline( ...@@ -799,6 +805,8 @@ def pipeline(
if task in NO_FEATURE_EXTRACTOR_TASKS: if task in NO_FEATURE_EXTRACTOR_TASKS:
load_feature_extractor = False load_feature_extractor = False
if task in NO_IMAGE_PROCESSOR_TASKS:
load_image_processor = False
if load_tokenizer: if load_tokenizer:
# Try to infer tokenizer from model or config name (if provided as str) # Try to infer tokenizer from model or config name (if provided as str)
...@@ -829,6 +837,27 @@ def pipeline( ...@@ -829,6 +837,27 @@ def pipeline(
tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
) )
if load_image_processor:
# Try to infer image processor from model or config name (if provided as str)
if image_processor is None:
if isinstance(model_name, str):
image_processor = model_name
elif isinstance(config, str):
image_processor = config
else:
# Impossible to guess what is the right image_processor here
raise Exception(
"Impossible to guess which image processor to use. "
"Please provide a PreTrainedImageProcessor class or a path/identifier "
"to a pretrained image processor."
)
# Instantiate image_processor if needed
if isinstance(image_processor, (str, tuple)):
image_processor = AutoImageProcessor.from_pretrained(
image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
)
if load_feature_extractor: if load_feature_extractor:
# Try to infer feature extractor from model or config name (if provided as str) # Try to infer feature extractor from model or config name (if provided as str)
if feature_extractor is None: if feature_extractor is None:
...@@ -897,6 +926,9 @@ def pipeline( ...@@ -897,6 +926,9 @@ def pipeline(
if torch_dtype is not None: if torch_dtype is not None:
kwargs["torch_dtype"] = torch_dtype kwargs["torch_dtype"] = torch_dtype
if image_processor is not None:
kwargs["image_processor"] = image_processor
if device is not None: if device is not None:
kwargs["device"] = device kwargs["device"] = device
......
...@@ -31,6 +31,7 @@ from packaging import version ...@@ -31,6 +31,7 @@ from packaging import version
from ..dynamic_module_utils import custom_object_save from ..dynamic_module_utils import custom_object_save
from ..feature_extraction_utils import PreTrainedFeatureExtractor from ..feature_extraction_utils import PreTrainedFeatureExtractor
from ..image_processing_utils import BaseImageProcessor
from ..modelcard import ModelCard from ..modelcard import ModelCard
from ..models.auto.configuration_auto import AutoConfig from ..models.auto.configuration_auto import AutoConfig
from ..tokenization_utils import PreTrainedTokenizer from ..tokenization_utils import PreTrainedTokenizer
...@@ -743,6 +744,7 @@ class Pipeline(_ScikitCompat): ...@@ -743,6 +744,7 @@ class Pipeline(_ScikitCompat):
model: Union["PreTrainedModel", "TFPreTrainedModel"], model: Union["PreTrainedModel", "TFPreTrainedModel"],
tokenizer: Optional[PreTrainedTokenizer] = None, tokenizer: Optional[PreTrainedTokenizer] = None,
feature_extractor: Optional[PreTrainedFeatureExtractor] = None, feature_extractor: Optional[PreTrainedFeatureExtractor] = None,
image_processor: Optional[BaseImageProcessor] = None,
modelcard: Optional[ModelCard] = None, modelcard: Optional[ModelCard] = None,
framework: Optional[str] = None, framework: Optional[str] = None,
task: str = "", task: str = "",
...@@ -759,6 +761,7 @@ class Pipeline(_ScikitCompat): ...@@ -759,6 +761,7 @@ class Pipeline(_ScikitCompat):
self.model = model self.model = model
self.tokenizer = tokenizer self.tokenizer = tokenizer
self.feature_extractor = feature_extractor self.feature_extractor = feature_extractor
self.image_processor = image_processor
self.modelcard = modelcard self.modelcard = modelcard
self.framework = framework self.framework = framework
if is_torch_available() and self.framework == "pt": if is_torch_available() and self.framework == "pt":
...@@ -1012,7 +1015,9 @@ class Pipeline(_ScikitCompat): ...@@ -1012,7 +1015,9 @@ class Pipeline(_ScikitCompat):
if "TOKENIZERS_PARALLELISM" not in os.environ: if "TOKENIZERS_PARALLELISM" not in os.environ:
logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already") logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already")
os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["TOKENIZERS_PARALLELISM"] = "false"
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor) # TODO hack by collating feature_extractor and image_processor
feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor)
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn) dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size) model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params) final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
...@@ -1121,7 +1126,10 @@ class ChunkPipeline(Pipeline): ...@@ -1121,7 +1126,10 @@ class ChunkPipeline(Pipeline):
) )
num_workers = 1 num_workers = 1
dataset = PipelineChunkIterator(inputs, self.preprocess, preprocess_params) dataset = PipelineChunkIterator(inputs, self.preprocess, preprocess_params)
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
# TODO hack by collating feature_extractor and image_processor
feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor)
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn) dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
model_iterator = PipelinePackIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size) model_iterator = PipelinePackIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params) final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
......
...@@ -67,6 +67,12 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -67,6 +67,12 @@ class ImageSegmentationPipeline(Pipeline):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
if self.image_processor is None and self.feature_extractor is not None:
# Backward compatible change, if users called
# ImageSegmentationPipeline(.., feature_extractor=MyFeatureExtractor())
# then we should keep working
self.image_processor = self.feature_extractor
if self.framework == "tf": if self.framework == "tf":
raise ValueError(f"The {self.__class__} is only available in PyTorch.") raise ValueError(f"The {self.__class__} is only available in PyTorch.")
...@@ -137,7 +143,7 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -137,7 +143,7 @@ class ImageSegmentationPipeline(Pipeline):
def preprocess(self, image): def preprocess(self, image):
image = load_image(image) image = load_image(image)
target_size = [(image.height, image.width)] target_size = [(image.height, image.width)]
inputs = self.feature_extractor(images=[image], return_tensors="pt") inputs = self.image_processor(images=[image], return_tensors="pt")
inputs["target_size"] = target_size inputs["target_size"] = target_size
return inputs return inputs
...@@ -152,10 +158,10 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -152,10 +158,10 @@ class ImageSegmentationPipeline(Pipeline):
): ):
fn = None fn = None
if subtask in {"panoptic", None} and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"): if subtask in {"panoptic", None} and hasattr(self.image_processor, "post_process_panoptic_segmentation"):
fn = self.feature_extractor.post_process_panoptic_segmentation fn = self.image_processor.post_process_panoptic_segmentation
elif subtask in {"instance", None} and hasattr(self.feature_extractor, "post_process_instance_segmentation"): elif subtask in {"instance", None} and hasattr(self.image_processor, "post_process_instance_segmentation"):
fn = self.feature_extractor.post_process_instance_segmentation fn = self.image_processor.post_process_instance_segmentation
if fn is not None: if fn is not None:
outputs = fn( outputs = fn(
...@@ -176,8 +182,8 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -176,8 +182,8 @@ class ImageSegmentationPipeline(Pipeline):
score = segment["score"] score = segment["score"]
annotation.append({"score": score, "label": label, "mask": mask}) annotation.append({"score": score, "label": label, "mask": mask})
elif subtask in {"semantic", None} and hasattr(self.feature_extractor, "post_process_semantic_segmentation"): elif subtask in {"semantic", None} and hasattr(self.image_processor, "post_process_semantic_segmentation"):
outputs = self.feature_extractor.post_process_semantic_segmentation( outputs = self.image_processor.post_process_semantic_segmentation(
model_outputs, target_sizes=model_outputs["target_size"] model_outputs, target_sizes=model_outputs["target_size"]
)[0] )[0]
......
...@@ -29,9 +29,6 @@ import numpy as np ...@@ -29,9 +29,6 @@ import numpy as np
from .import_utils import is_flax_available, is_tf_available, is_torch_available, is_torch_fx_proxy from .import_utils import is_flax_available, is_tf_available, is_torch_available, is_torch_fx_proxy
if is_tf_available():
import tensorflow as tf
if is_flax_available(): if is_flax_available():
import jax.numpy as jnp import jax.numpy as jnp
...@@ -437,6 +434,8 @@ def transpose(array, axes=None): ...@@ -437,6 +434,8 @@ def transpose(array, axes=None):
elif is_torch_tensor(array): elif is_torch_tensor(array):
return array.T if axes is None else array.permute(*axes) return array.T if axes is None else array.permute(*axes)
elif is_tf_tensor(array): elif is_tf_tensor(array):
import tensorflow as tf
return tf.transpose(array, perm=axes) return tf.transpose(array, perm=axes)
elif is_jax_tensor(array): elif is_jax_tensor(array):
return jnp.transpose(array, axes=axes) return jnp.transpose(array, axes=axes)
...@@ -454,6 +453,8 @@ def reshape(array, newshape): ...@@ -454,6 +453,8 @@ def reshape(array, newshape):
elif is_torch_tensor(array): elif is_torch_tensor(array):
return array.reshape(*newshape) return array.reshape(*newshape)
elif is_tf_tensor(array): elif is_tf_tensor(array):
import tensorflow as tf
return tf.reshape(array, newshape) return tf.reshape(array, newshape)
elif is_jax_tensor(array): elif is_jax_tensor(array):
return jnp.reshape(array, newshape) return jnp.reshape(array, newshape)
...@@ -471,6 +472,8 @@ def squeeze(array, axis=None): ...@@ -471,6 +472,8 @@ def squeeze(array, axis=None):
elif is_torch_tensor(array): elif is_torch_tensor(array):
return array.squeeze() if axis is None else array.squeeze(dim=axis) return array.squeeze() if axis is None else array.squeeze(dim=axis)
elif is_tf_tensor(array): elif is_tf_tensor(array):
import tensorflow as tf
return tf.squeeze(array, axis=axis) return tf.squeeze(array, axis=axis)
elif is_jax_tensor(array): elif is_jax_tensor(array):
return jnp.squeeze(array, axis=axis) return jnp.squeeze(array, axis=axis)
...@@ -488,6 +491,8 @@ def expand_dims(array, axis): ...@@ -488,6 +491,8 @@ def expand_dims(array, axis):
elif is_torch_tensor(array): elif is_torch_tensor(array):
return array.unsqueeze(dim=axis) return array.unsqueeze(dim=axis)
elif is_tf_tensor(array): elif is_tf_tensor(array):
import tensorflow as tf
return tf.expand_dims(array, axis=axis) return tf.expand_dims(array, axis=axis)
elif is_jax_tensor(array): elif is_jax_tensor(array):
return jnp.expand_dims(array, axis=axis) return jnp.expand_dims(array, axis=axis)
...@@ -504,6 +509,8 @@ def tensor_size(array): ...@@ -504,6 +509,8 @@ def tensor_size(array):
elif is_torch_tensor(array): elif is_torch_tensor(array):
return array.numel() return array.numel()
elif is_tf_tensor(array): elif is_tf_tensor(array):
import tensorflow as tf
return tf.size(array) return tf.size(array)
elif is_jax_tensor(array): elif is_jax_tensor(array):
return array.size return array.size
......
...@@ -27,7 +27,7 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta ...@@ -27,7 +27,7 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor) audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor)
# test with a raw waveform # test with a raw waveform
......
...@@ -61,7 +61,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel ...@@ -61,7 +61,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
+ (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else []) + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
} }
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
if tokenizer is None: if tokenizer is None:
# Side effect of no Fast Tokenizer class for these model, so skipping # Side effect of no Fast Tokenizer class for these model, so skipping
# But the slow tokenizer test should still run as they're quite small # But the slow tokenizer test should still run as they're quite small
......
...@@ -33,8 +33,10 @@ from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_ ...@@ -33,8 +33,10 @@ from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from transformers import ( from transformers import (
FEATURE_EXTRACTOR_MAPPING, FEATURE_EXTRACTOR_MAPPING,
IMAGE_PROCESSOR_MAPPING,
TOKENIZER_MAPPING, TOKENIZER_MAPPING,
AutoFeatureExtractor, AutoFeatureExtractor,
AutoImageProcessor,
AutoModelForSequenceClassification, AutoModelForSequenceClassification,
AutoTokenizer, AutoTokenizer,
DistilBertForSequenceClassification, DistilBertForSequenceClassification,
...@@ -154,8 +156,6 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_ ...@@ -154,8 +156,6 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
feature_extractor = None feature_extractor = None
except Exception: except Exception:
feature_extractor = None feature_extractor = None
if hasattr(tiny_config, "image_size") and feature_extractor:
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
# Audio Spectogram Transformer specific. # Audio Spectogram Transformer specific.
if feature_extractor.__class__.__name__ == "ASTFeatureExtractor": if feature_extractor.__class__.__name__ == "ASTFeatureExtractor":
...@@ -168,9 +168,28 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_ ...@@ -168,9 +168,28 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
feature_extractor = feature_extractor.__class__( feature_extractor = feature_extractor.__class__(
feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
) )
# TODO remove this, once those have been moved to `image_processor`.
if hasattr(tiny_config, "image_size") and feature_extractor:
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
return feature_extractor return feature_extractor
def get_tiny_image_processor_from_checkpoint(checkpoint, tiny_config, image_processor_class):
try:
image_processor = AutoImageProcessor.from_pretrained(checkpoint)
except Exception:
try:
if image_processor_class is not None:
image_processor = image_processor_class()
else:
image_processor = None
except Exception:
image_processor = None
if hasattr(tiny_config, "image_size") and image_processor:
image_processor = image_processor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
return image_processor
class ANY: class ANY:
def __init__(self, *_types): def __init__(self, *_types):
self._types = _types self._types = _types
...@@ -184,7 +203,9 @@ class ANY: ...@@ -184,7 +203,9 @@ class ANY:
class PipelineTestCaseMeta(type): class PipelineTestCaseMeta(type):
def __new__(mcs, name, bases, dct): def __new__(mcs, name, bases, dct):
def gen_test(ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class): def gen_test(
ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class, image_processor_class
):
@skipIf( @skipIf(
tiny_config is None, tiny_config is None,
"TinyConfig does not exist, make sure that you defined a `_CONFIG_FOR_DOC` variable in the modeling" "TinyConfig does not exist, make sure that you defined a `_CONFIG_FOR_DOC` variable in the modeling"
...@@ -231,16 +252,21 @@ class PipelineTestCaseMeta(type): ...@@ -231,16 +252,21 @@ class PipelineTestCaseMeta(type):
self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer") self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
else: else:
tokenizer = None tokenizer = None
feature_extractor = get_tiny_feature_extractor_from_checkpoint( feature_extractor = get_tiny_feature_extractor_from_checkpoint(
checkpoint, tiny_config, feature_extractor_class checkpoint, tiny_config, feature_extractor_class
) )
if tokenizer is None and feature_extractor is None: image_processor = get_tiny_image_processor_from_checkpoint(
checkpoint, tiny_config, image_processor_class
)
if tokenizer is None and feature_extractor is None and image_processor:
self.skipTest( self.skipTest(
f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with" f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor or image_processor"
" no FastTokenizer ?)" " (PerceiverConfig with no FastTokenizer ?)"
) )
pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor) pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor, image_processor)
if pipeline is None: if pipeline is None:
# The test can disable itself, but it should be very marginal # The test can disable itself, but it should be very marginal
# Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist) # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
...@@ -283,6 +309,10 @@ class PipelineTestCaseMeta(type): ...@@ -283,6 +309,10 @@ class PipelineTestCaseMeta(type):
feature_extractor_name = ( feature_extractor_name = (
feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor" feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor"
) )
image_processor_class = IMAGE_PROCESSOR_MAPPING.get(configuration, None)
image_processor_name = (
image_processor_class.__name__ if image_processor_class else "noimage_processor"
)
if not tokenizer_classes: if not tokenizer_classes:
# We need to test even if there are no tokenizers. # We need to test even if there are no tokenizers.
tokenizer_classes = [None] tokenizer_classes = [None]
...@@ -300,7 +330,7 @@ class PipelineTestCaseMeta(type): ...@@ -300,7 +330,7 @@ class PipelineTestCaseMeta(type):
else: else:
tokenizer_name = "notokenizer" tokenizer_name = "notokenizer"
test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}" test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}_{image_processor_name}"
if tokenizer_class is not None or feature_extractor_class is not None: if tokenizer_class is not None or feature_extractor_class is not None:
dct[test_name] = gen_test( dct[test_name] = gen_test(
...@@ -309,6 +339,7 @@ class PipelineTestCaseMeta(type): ...@@ -309,6 +339,7 @@ class PipelineTestCaseMeta(type):
tiny_config, tiny_config,
tokenizer_class, tokenizer_class,
feature_extractor_class, feature_extractor_class,
image_processor_class,
) )
@abstractmethod @abstractmethod
......
...@@ -53,7 +53,7 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM ...@@ -53,7 +53,7 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
else [] else []
) )
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer) conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
return conversation_agent, [Conversation("Hi there!")] return conversation_agent, [Conversation("Hi there!")]
......
...@@ -47,7 +47,7 @@ class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCase ...@@ -47,7 +47,7 @@ class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCase
model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
depth_estimator = DepthEstimationPipeline(model=model, feature_extractor=feature_extractor) depth_estimator = DepthEstimationPipeline(model=model, feature_extractor=feature_extractor)
return depth_estimator, [ return depth_estimator, [
"./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png",
......
...@@ -59,7 +59,7 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=Pipeli ...@@ -59,7 +59,7 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=Pipeli
@require_pytesseract @require_pytesseract
@require_vision @require_vision
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
dqa_pipeline = pipeline( dqa_pipeline = pipeline(
"document-question-answering", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor "document-question-answering", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
) )
......
...@@ -175,7 +175,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -175,7 +175,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
raise ValueError("We expect lists of floats, nothing else") raise ValueError("We expect lists of floats, nothing else")
return shape return shape
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
if tokenizer is None: if tokenizer is None:
self.skipTest("No tokenizer") self.skipTest("No tokenizer")
return return
......
...@@ -206,7 +206,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): ...@@ -206,7 +206,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
unmasker.tokenizer.pad_token = None unmasker.tokenizer.pad_token = None
self.run_pipeline_test(unmasker, []) self.run_pipeline_test(unmasker, [])
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
if tokenizer is None or tokenizer.mask_token_id is None: if tokenizer is None or tokenizer.mask_token_id is None:
self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)") self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
......
...@@ -49,7 +49,7 @@ class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest ...@@ -49,7 +49,7 @@ class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2) image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2)
examples = [ examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
......
...@@ -26,6 +26,7 @@ from transformers import ( ...@@ -26,6 +26,7 @@ from transformers import (
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING, MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
AutoFeatureExtractor, AutoFeatureExtractor,
AutoImageProcessor,
AutoModelForImageSegmentation, AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation, AutoModelForInstanceSegmentation,
DetrForSegmentation, DetrForSegmentation,
...@@ -80,8 +81,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -80,8 +81,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else []) + (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
} }
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor) image_segmenter = ImageSegmentationPipeline(
model=model, feature_extractor=feature_extractor, image_processor=image_processor
)
return image_segmenter, [ return image_segmenter, [
"./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png",
...@@ -139,7 +142,11 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -139,7 +142,11 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
"./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png",
] ]
outputs = image_segmenter( outputs = image_segmenter(
batch, threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0, batch_size=batch_size batch,
threshold=0.0,
mask_threshold=0,
overlap_mask_area_threshold=0,
batch_size=batch_size,
) )
self.assertEqual(len(batch), len(outputs)) self.assertEqual(len(batch), len(outputs))
self.assertEqual(len(outputs[0]), n) self.assertEqual(len(outputs[0]), n)
...@@ -188,10 +195,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -188,10 +195,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic" model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic"
model = AutoModelForImageSegmentation.from_pretrained(model_id) model = AutoModelForImageSegmentation.from_pretrained(model_id)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) image_processor = AutoImageProcessor.from_pretrained(model_id)
image_segmenter = ImageSegmentationPipeline( image_segmenter = ImageSegmentationPipeline(
model=model, model=model,
feature_extractor=feature_extractor, image_processor=image_processor,
subtask="panoptic", subtask="panoptic",
threshold=0.0, threshold=0.0,
mask_threshold=0.0, mask_threshold=0.0,
......
...@@ -36,7 +36,7 @@ class ImageToTextPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta ...@@ -36,7 +36,7 @@ class ImageToTextPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor) pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
examples = [ examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
......
...@@ -51,7 +51,7 @@ else: ...@@ -51,7 +51,7 @@ else:
class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor) object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"] return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
......
...@@ -31,7 +31,7 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): ...@@ -31,7 +31,7 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
if isinstance(model.config, LxmertConfig): if isinstance(model.config, LxmertConfig):
# This is an bimodal model, we need to find a more consistent way # This is an bimodal model, we need to find a more consistent way
# to switch on those models. # to switch on those models.
......
...@@ -34,7 +34,7 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe ...@@ -34,7 +34,7 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer) summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"] return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]
......
...@@ -34,7 +34,7 @@ class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTest ...@@ -34,7 +34,7 @@ class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTest
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer) generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
return generator, ["Something to write", "Something else"] return generator, ["Something to write", "Something else"]
......
...@@ -129,7 +129,7 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC ...@@ -129,7 +129,7 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC
outputs = text_classifier("Birds are a type of animal") outputs = text_classifier("Birds are a type of animal")
self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}]) self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
def get_test_pipeline(self, model, tokenizer, feature_extractor): def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer) text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
return text_classifier, ["HuggingFace is in", "This is another test"] return text_classifier, ["HuggingFace is in", "This is another test"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment