Moving feature-extraction pipeline to new testing scheme (#12843)

* Update feature extraction pipelilne. * Leaving 1 small model for actual values check. * Fixes tests - Better support for tokenizer with no pad token - Increasing PegasusModelTesterConfig for pipelines - Test of feature extraction are more permissive + don't test Multimodel models + encoder-decoder. * Fixing model loading with incorrect shape (+ model with HEAD). * Update tests/test_pipelines_common.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Revert modeling_utils modification. * Some corrections. * Update tests/test_pipelines_common.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update tests/test_pipelines_feature_extraction.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Syntax. * Fixing text-classification tests. * Don't modify this file. Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Moving feature-extraction pipeline to new testing scheme (#12843)
* Update feature extraction pipelilne. * Leaving 1 small model for actual values check. * Fixes tests - Better support for tokenizer with no pad token - Increasing PegasusModelTesterConfig for pipelines - Test of feature extraction are more permissive + don't test Multimodel models + encoder-decoder. * Fixing model loading with incorrect shape (+ model with HEAD). * Update tests/test_pipelines_common.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Revert modeling_utils modification. * Some corrections. * Update tests/test_pipelines_common.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update tests/test_pipelines_feature_extraction.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Syntax. * Fixing text-classification tests. * Don't modify this file. Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
e2d22eef · Nicolas Patry · GitHub · 640421c0 · e2d22eef · e2d22eef
Unverified Commit e2d22eef authored Jul 29, 2021 by Nicolas Patry Committed by GitHub Jul 29, 2021
9 changed files
--- a/src/transformers/models/pegasus/modeling_pegasus.py
+++ b/src/transformers/models/pegasus/modeling_pegasus.py
@@ -46,6 +46,7 @@ from .configuration_pegasus import PegasusConfig

 logger = logging.get_logger(__name__)

+_CHECKPOINT_FOR_DOC = "google/pegasus-large"
 _CONFIG_FOR_DOC = "PegasusConfig"
 _TOKENIZER_FOR_DOC = "PegasusTokenizer"


--- a/src/transformers/pipelines/base.py
+++ b/src/transformers/pipelines/base.py
@@ -134,6 +134,8 @@ def infer_framework_load_model(

            try:
                model = model_class.from_pretrained(model, **kwargs)
+                if hasattr(model, "eval"):
+                    model = model.eval()
                # Stop loading on the first successful load.
                break
            except (OSError, ValueError):
@@ -746,7 +748,6 @@ class Pipeline(_ScikitCompat):
        Parse arguments and tokenize
        """
        # Parse arguments
-        try:
        inputs = self.tokenizer(
            inputs,
            add_special_tokens=add_special_tokens,
@@ -754,21 +755,26 @@ class Pipeline(_ScikitCompat):
            padding=padding,
            truncation=truncation,
        )
-        except ValueError:
-            # Can be linked to no padding token, if padding_token does not exist we should recover
-            inputs = self.tokenizer(
-                inputs,
-                add_special_tokens=add_special_tokens,
-                return_tensors=self.framework,
-                padding=False,
-                truncation=truncation,
-            )
-
        return inputs

-    def __call__(self, *args, **kwargs):
-        inputs = self._parse_and_tokenize(*args, **kwargs)
-        return self._forward(inputs)
+    def __call__(self, inputs, *args, **kwargs):
+        try:
+            model_inputs = self._parse_and_tokenize(inputs, *args, **kwargs)
+            outputs = self._forward(model_inputs)
+            return outputs
+        except ValueError:
+            # XXX: Some tokenizer do NOT have a pad token, hence we cannot run the inference
+            # in a batch, instead we run everything sequentially
+            if isinstance(inputs, list):
+                values = []
+                for input_ in inputs:
+                    model_input = self._parse_and_tokenize(input_, padding=False, *args, **kwargs)
+                    value = self._forward(model_input)
+                    values.append(value.squeeze(0))
+            else:
+                model_input = self._parse_and_tokenize(inputs, padding=False, *args, **kwargs)
+                values = self._forward(model_input)
+            return values

    def _forward(self, inputs, return_tensors=False):
        """

--- a/src/transformers/pipelines/feature_extraction.py
+++ b/src/transformers/pipelines/feature_extraction.py
@@ -79,4 +79,10 @@ class FeatureExtractionPipeline(Pipeline):
        Return:
            A nested list of :obj:`float`: The features computed by the model.
        """
-        return super().__call__(*args, **kwargs).tolist()
+        results = super().__call__(*args, **kwargs)
+        if isinstance(results, list):
+            # Sequential run
+            results = [r.tolist() for r in results]
+        else:
+            results = results.tolist()
+        return results
--- a/tests/test_modeling_pegasus.py
+++ b/tests/test_modeling_pegasus.py
@@ -117,6 +117,24 @@ class PegasusModelTester:
        inputs_dict = prepare_pegasus_inputs_dict(config, input_ids, decoder_input_ids)
        return config, inputs_dict

+    def get_pipeline_config(self):
+        return PegasusConfig(
+            vocab_size=200,
+            d_model=self.hidden_size,
+            encoder_layers=self.num_hidden_layers,
+            decoder_layers=self.num_hidden_layers,
+            encoder_attention_heads=self.num_attention_heads,
+            decoder_attention_heads=self.num_attention_heads,
+            encoder_ffn_dim=self.intermediate_size,
+            decoder_ffn_dim=self.intermediate_size,
+            dropout=self.hidden_dropout_prob,
+            attention_dropout=self.attention_probs_dropout_prob,
+            max_position_embeddings=200,
+            eos_token_id=self.eos_token_id,
+            bos_token_id=self.bos_token_id,
+            pad_token_id=self.pad_token_id,
+        )
+
    def get_config(self):
        return PegasusConfig(
            vocab_size=self.vocab_size,

--- a/tests/test_modeling_t5.py
+++ b/tests/test_modeling_t5.py
@@ -111,6 +111,24 @@ class T5ModelTester:
            lm_labels,
        )

+    def get_pipeline_config(self):
+        return T5Config(
+            vocab_size=166,  # t5 forces 100 extra tokens
+            d_model=self.hidden_size,
+            d_ff=self.d_ff,
+            d_kv=self.hidden_size // self.num_attention_heads,
+            num_layers=self.num_hidden_layers,
+            num_decoder_layers=self.decoder_layers,
+            num_heads=self.num_attention_heads,
+            relative_attention_num_buckets=self.relative_attention_num_buckets,
+            dropout_rate=self.dropout_rate,
+            initializer_factor=self.initializer_factor,
+            eos_token_id=self.eos_token_id,
+            bos_token_id=self.pad_token_id,
+            pad_token_id=self.pad_token_id,
+            decoder_start_token_id=self.decoder_start_token_id,
+        )
+
    def get_config(self):
        return T5Config(
            vocab_size=self.vocab_size,

--- a/tests/test_modeling_tf_pegasus.py
+++ b/tests/test_modeling_tf_pegasus.py
@@ -50,7 +50,7 @@ class TFPegasusModelTester:
        intermediate_size=37,
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
-        max_position_embeddings=20,
+        max_position_embeddings=40,
        eos_token_id=2,
        pad_token_id=1,
        bos_token_id=0,

--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -29,7 +29,11 @@ logger = logging.getLogger(__name__)


 def get_checkpoint_from_architecture(architecture):
+    try:
        module = importlib.import_module(architecture.__module__)
+    except ImportError:
+        logger.error(f"Ignoring architecture {architecture}")
+        return

    if hasattr(module, "_CHECKPOINT_FOR_DOC"):
        return module._CHECKPOINT_FOR_DOC
@@ -46,8 +50,12 @@ def get_tiny_config_from_class(configuration_class):
    model_type = configuration_class.model_type
    camel_case_model_name = configuration_class.__name__.split("Config")[0]

+    try:
        module = importlib.import_module(f".test_modeling_{model_type.replace('-', '_')}", package="tests")
        model_tester_class = getattr(module, f"{camel_case_model_name}ModelTester", None)
+    except (ImportError, AttributeError):
+        logger.error(f"No model tester class for {configuration_class.__name__}")
+        return

    if model_tester_class is None:
        logger.warning(f"No model tester class for {configuration_class.__name__}")
@@ -95,6 +103,7 @@ class PipelineTestCaseMeta(type):
                    model = model.eval()
                try:
                    tokenizer = get_tiny_tokenizer_from_checkpoint(checkpoint)
+                    if hasattr(model.config, "max_position_embeddings"):
                        tokenizer.model_max_length = model.config.max_position_embeddings
                # Rust Panic exception are NOT Exception subclass
                # Some test tokenizer contain broken vocabs or custom PreTokenizer, so we
@@ -107,26 +116,20 @@ class PipelineTestCaseMeta(type):

            return test

-        mapping = dct.get("model_mapping", {})
+        for prefix, key in [("pt", "model_mapping"), ("tf", "tf_model_mapping")]:
+            mapping = dct.get(key, {})
            if mapping:
-            for configuration, model_architecture in mapping.items():
-                checkpoint = get_checkpoint_from_architecture(model_architecture)
-                tiny_config = get_tiny_config_from_class(configuration)
-                tokenizer_classes = TOKENIZER_MAPPING.get(configuration, [])
-                for tokenizer_class in tokenizer_classes:
-                    if tokenizer_class is not None and tokenizer_class.__name__.endswith("Fast"):
-                        test_name = f"test_pt_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_class.__name__}"
-                        dct[test_name] = gen_test(model_architecture, checkpoint, tiny_config, tokenizer_class)
+                for configuration, model_architectures in mapping.items():
+                    if not isinstance(model_architectures, tuple):
+                        model_architectures = (model_architectures,)

-        tf_mapping = dct.get("tf_model_mapping", {})
-        if tf_mapping:
-            for configuration, model_architecture in tf_mapping.items():
+                    for model_architecture in model_architectures:
                        checkpoint = get_checkpoint_from_architecture(model_architecture)
                        tiny_config = get_tiny_config_from_class(configuration)
                        tokenizer_classes = TOKENIZER_MAPPING.get(configuration, [])
                        for tokenizer_class in tokenizer_classes:
                            if tokenizer_class is not None and tokenizer_class.__name__.endswith("Fast"):
-                        test_name = f"test_tf_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_class.__name__}"
+                                test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_class.__name__}"
                                dct[test_name] = gen_test(model_architecture, checkpoint, tiny_config, tokenizer_class)

        return type.__new__(mcs, name, bases, dct)

--- a/tests/test_pipelines_feature_extraction.py
+++ b/tests/test_pipelines_feature_extraction.py
@@ -14,13 +14,72 @@

 import unittest

-from .test_pipelines_common import MonoInputPipelineCommonMixin
+from transformers import MODEL_MAPPING, TF_MODEL_MAPPING, FeatureExtractionPipeline, LxmertConfig, pipeline
+from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch

+from .test_pipelines_common import PipelineTestCaseMeta

-class FeatureExtractionPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
-    pipeline_task = "feature-extraction"
-    small_models = [
-        "sshleifer/tiny-distilbert-base-cased"
-    ]  # Default model - Models tested without the @slow decorator
-    large_models = [None]  # Models tested with the @slow decorator
-    mandatory_keys = {}  # Keys which should be in the output
+
+@is_pipeline_test
+class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_MAPPING
+    tf_model_mapping = TF_MODEL_MAPPING
+
+    @require_torch
+    def test_small_model_pt(self):
+        feature_extractor = pipeline(
+            task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="pt"
+        )
+        outputs = feature_extractor("This is a test")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [[[-0.454, 0.966, 0.619, 0.262, 0.669, -0.661, -0.066, -0.513, -0.768, -0.177, 1.771, -0.665, -0.649, 0.219, 0.236, -0.375, 1.155, -1.07, 0.208, -0.799, 1.065, -1.223, 0.554, 1.274, 0.458, 2.292, -0.481, -0.928, -2.469, -1.692, 0.182, 1.06], [-0.187, -1.277, 0.849, -0.439, -0.967, -1.347, 1.063, 0.469, 1.086, -1.253, 0.349, 0.057, 1.031, -1.903, -0.432, -1.377, 0.379, 0.733, -1.043, 1.307, 0.865, 0.229, 1.373, 1.671, -0.285, 0.599, -1.418, -1.179, -0.369, 1.039, -0.705, 1.082], [-1.735, 1.102, 0.398, -0.245, 1.452, 0.46, -1.734, -0.746, 1.831, 0.562, 1.464, -0.342, -0.619, -0.455, 0.127, -1.209, -0.686, -0.395, -0.316, 2.467, -0.379, 0.328, 0.639, 0.4, -1.097, -0.096, 0.397, -0.806, -1.621, 1.127, -0.345, 0.074], [0.296, -0.638, 1.938, -0.151, -1.19, 1.445, 1.318, 0.711, -0.125, 0.127, -2.179, 0.481, -1.019, 1.178, 0.318, 1.858, -1.646, 0.185, -0.072, -0.979, 0.82, -1.374, 0.836, -1.019, 0.043, -0.156, -0.095, 0.641, -0.195, -0.076, -1.554, 0.275], [-0.266, 0.971, 0.745, -0.37, 1.42, -0.5, -0.53, 0.061, 1.311, -0.1, 1.796, 0.53, -0.739, -0.325, 0.28, -1.72, 0.382, -1.118, 0.442, 1.84, -2.497, 1.003, -0.788, -0.224, -0.604, -1.259, -0.475, 1.18, -1.356, 0.695, 0.201, 0.016], [-0.618, -1.495, -0.67, -0.106, -1.265, -0.51, -1.752, 1.018, 0.674, 0.181, 0.297, 0.479, -0.185, 0.081, -2.44, -0.239, 1.081, -1.38, 0.679, 0.878, 1.336, -1.347, 0.969, -0.847, 0.293, 0.476, 1.647, -0.641, 0.66, 1.236, 0.761, 0.751]]])  # fmt: skip
+
+    @require_tf
+    def test_small_model_tf(self):
+        feature_extractor = pipeline(
+            task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="tf"
+        )
+        outputs = feature_extractor("This is a test")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [[[-0.454, 0.966, 0.619, 0.262, 0.669, -0.661, -0.066, -0.513, -0.768, -0.177, 1.771, -0.665, -0.649, 0.219, 0.236, -0.375, 1.155, -1.07, 0.208, -0.799, 1.065, -1.223, 0.554, 1.274, 0.458, 2.292, -0.481, -0.928, -2.469, -1.692, 0.182, 1.06], [-0.187, -1.277, 0.849, -0.439, -0.967, -1.347, 1.063, 0.469, 1.086, -1.253, 0.349, 0.057, 1.031, -1.903, -0.432, -1.377, 0.379, 0.733, -1.043, 1.307, 0.865, 0.229, 1.373, 1.671, -0.285, 0.599, -1.418, -1.179, -0.369, 1.039, -0.705, 1.082], [-1.735, 1.102, 0.398, -0.245, 1.452, 0.46, -1.734, -0.746, 1.831, 0.562, 1.464, -0.342, -0.619, -0.455, 0.127, -1.209, -0.686, -0.395, -0.316, 2.467, -0.379, 0.328, 0.639, 0.4, -1.097, -0.096, 0.397, -0.806, -1.621, 1.127, -0.345, 0.074], [0.296, -0.638, 1.938, -0.151, -1.19, 1.445, 1.318, 0.711, -0.125, 0.127, -2.179, 0.481, -1.019, 1.178, 0.318, 1.858, -1.646, 0.185, -0.072, -0.979, 0.82, -1.374, 0.836, -1.019, 0.043, -0.156, -0.095, 0.641, -0.195, -0.076, -1.554, 0.275], [-0.266, 0.971, 0.745, -0.37, 1.42, -0.5, -0.53, 0.061, 1.311, -0.1, 1.796, 0.53, -0.739, -0.325, 0.28, -1.72, 0.382, -1.118, 0.442, 1.84, -2.497, 1.003, -0.788, -0.224, -0.604, -1.259, -0.475, 1.18, -1.356, 0.695, 0.201, 0.016], [-0.618, -1.495, -0.67, -0.106, -1.265, -0.51, -1.752, 1.018, 0.674, 0.181, 0.297, 0.479, -0.185, 0.081, -2.44, -0.239, 1.081, -1.38, 0.679, 0.878, 1.336, -1.347, 0.969, -0.847, 0.293, 0.476, 1.647, -0.641, 0.66, 1.236, 0.761, 0.751]]])  # fmt: skip
+
+    def get_shape(self, input_, shape=None):
+        if shape is None:
+            shape = []
+        if isinstance(input_, list):
+            subshapes = [self.get_shape(in_, shape) for in_ in input_]
+            if all(s == 0 for s in subshapes):
+                shape.append(len(input_))
+            else:
+                subshape = subshapes[0]
+                shape = [len(input_), *subshape]
+        elif isinstance(input_, float):
+            return 0
+        else:
+            raise ValueError("We expect lists of floats, nothing else")
+        return shape
+
+    def run_pipeline_test(self, model, tokenizer):
+        if isinstance(model.config, LxmertConfig):
+            # This is an bimodal model, we need to find a more consistent way
+            # to switch on those models.
+            return
+
+        feature_extractor = FeatureExtractionPipeline(model=model, tokenizer=tokenizer)
+        if feature_extractor.model.config.is_encoder_decoder:
+            # encoder_decoder models are trickier for this pipeline.
+            # Do we want encoder + decoder inputs to get some featues?
+            # Do we want encoder only features ?
+            # For now ignore those.
+            return
+
+        outputs = feature_extractor("This is a test")
+
+        shape = self.get_shape(outputs)
+        self.assertEqual(shape[0], 1)
+
+        outputs = feature_extractor(["This is a test", "Another test"])
+        shape = self.get_shape(outputs)
+        self.assertEqual(shape[0], 2)
--- a/tests/test_pipelines_text_classification.py
+++ b/tests/test_pipelines_text_classification.py
@@ -30,6 +30,24 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC
    model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING

+    @require_torch
+    def test_pt_bert_small(self):
+        text_classifier = pipeline(
+            task="text-classification", model="Narsil/tiny-distilbert-sequence-classification", framework="pt"
+        )
+
+        outputs = text_classifier("This is great !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_1", "score": 0.502}])
+
+    @require_tf
+    def test_tf_bert_small(self):
+        text_classifier = pipeline(
+            task="text-classification", model="Narsil/tiny-distilbert-sequence-classification", framework="tf"
+        )
+
+        outputs = text_classifier("This is great !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_1", "score": 0.502}])
+
    @slow
    @require_torch
    def test_pt_bert(self):