Allow FP16 or other precision inference for Pipelines (#31342)

* cast image features to model.dtype where needed to support FP16 or other precision in pipelines * Update src/transformers/pipelines/image_feature_extraction.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Use .to instead * Add FP16 pipeline support for zeroshot audio classification * Remove unused torch imports * Add docs on FP16 pipeline * Remove unused import * Add FP16 tests to pipeline mixin * Add fp16 placeholder for mask_generation pipeline test * Add FP16 tests for all pipelines * Fix formatting * Remove torch_dtype arg from is_pipeline_test_to_skip* * Fix format * trigger ci --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

Allow FP16 or other precision inference for Pipelines (#31342)
* cast image features to model.dtype where needed to support FP16 or other precision in pipelines * Update src/transformers/pipelines/image_feature_extraction.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Use .to instead * Add FP16 pipeline support for zeroshot audio classification * Remove unused torch imports * Add docs on FP16 pipeline * Remove unused import * Add FP16 tests to pipeline mixin * Add fp16 placeholder for mask_generation pipeline test * Add FP16 tests for all pipelines * Fix formatting * Remove torch_dtype arg from is_pipeline_test_to_skip* * Fix format * trigger ci --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
ac262604 · aliencaocao · GitHub · e7868444 · ac262604 · ac262604
Unverified Commit ac262604 authored Jul 06, 2024 by aliencaocao Committed by GitHub Jul 05, 2024
5 changed files
--- a/tests/pipelines/test_pipelines_zero_shot.py
+++ b/tests/pipelines/test_pipelines_zero_shot.py
@@ -42,9 +42,9 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase):
            config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
        }

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        classifier = ZeroShotClassificationPipeline(
-            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
+            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"], torch_dtype=torch_dtype
        )
        return classifier, ["Who are you voting for in 2020?", "My stomach hurts."]


--- a/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
@@ -28,9 +28,11 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
    # model_mapping = {CLAPConfig: CLAPModel}

    @require_torch
-    def test_small_model_pt(self):
+    def test_small_model_pt(self, torch_dtype="float32"):
        audio_classifier = pipeline(
-            task="zero-shot-audio-classification", model="hf-internal-testing/tiny-clap-htsat-unfused"
+            task="zero-shot-audio-classification",
+            model="hf-internal-testing/tiny-clap-htsat-unfused",
+            torch_dtype=torch_dtype,
        )
        dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
        audio = dataset["train"]["audio"][-1]["array"]
@@ -40,6 +42,10 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
            [{"score": 0.501, "label": "Sound of a dog"}, {"score": 0.499, "label": "Sound of vaccum cleaner"}],
        )

+    @require_torch
+    def test_small_model_pt_fp16(self):
+        self.test_small_model_pt(torch_dtype="float16")
+
    @unittest.skip(reason="No models are available in TF")
    def test_small_model_tf(self):
        pass

--- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py
@@ -71,9 +71,9 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
    #     outputs = pipe([image] * 3, batch_size=2, candidate_labels=["A", "B"])

    @require_torch
-    def test_small_model_pt(self):
+    def test_small_model_pt(self, torch_dtype="float32"):
        image_classifier = pipeline(
-            model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
+            model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", torch_dtype=torch_dtype
        )
        image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
        output = image_classifier(image, candidate_labels=["a", "b", "c"])
@@ -127,6 +127,10 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
            ],
        )

+    @require_torch
+    def test_small_model_pt_fp16(self):
+        self.test_small_model_pt(torch_dtype="float16")
+
    @require_tf
    def test_small_model_tf(self):
        image_classifier = pipeline(

--- a/tests/pipelines/test_pipelines_zero_shot_object_detection.py
+++ b/tests/pipelines/test_pipelines_zero_shot_object_detection.py
@@ -43,9 +43,11 @@ else:
 class ZeroShotObjectDetectionPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        object_detector = pipeline(
-            "zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection"
+            "zero-shot-object-detection",
+            model="hf-internal-testing/tiny-random-owlvit-object-detection",
+            torch_dtype=torch_dtype,
        )

        examples = [

--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -126,16 +126,18 @@ class PipelineTesterMixin:
    pipeline_model_mapping = None
    supported_frameworks = ["pt", "tf"]

-    def run_task_tests(self, task):
+    def run_task_tests(self, task, torch_dtype="float32"):
        """Run pipeline tests for a specific `task`

        Args:
            task (`str`):
                A task name. This should be a key in the mapping `pipeline_test_mapping`.
+            torch_dtype (`str`, `optional`, defaults to `'float32'`):
+                The torch dtype to use for the model. Can be used for FP16/other precision inference.
        """
        if task not in self.pipeline_model_mapping:
            self.skipTest(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: `{task}` is not in "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: `{task}` is not in "
                f"`self.pipeline_model_mapping` for `{self.__class__.__name__}`."
            )

@@ -171,10 +173,12 @@ class PipelineTesterMixin:
                repo_name = model_arch_name

            self.run_model_pipeline_tests(
-                task, repo_name, model_architecture, tokenizer_names, processor_names, commit
+                task, repo_name, model_architecture, tokenizer_names, processor_names, commit, torch_dtype
            )

-    def run_model_pipeline_tests(self, task, repo_name, model_architecture, tokenizer_names, processor_names, commit):
+    def run_model_pipeline_tests(
+        self, task, repo_name, model_architecture, tokenizer_names, processor_names, commit, torch_dtype="float32"
+    ):
        """Run pipeline tests for a specific `task` with the give model class and tokenizer/processor class names

        Args:
@@ -188,6 +192,10 @@ class PipelineTesterMixin:
                A list of names of a subclasses of `PreTrainedTokenizerFast` or `PreTrainedTokenizer`.
            processor_names (`List[str]`):
                A list of names of subclasses of `BaseImageProcessor` or `FeatureExtractionMixin`.
+            commit (`str`):
+                The commit hash of the model repository on the Hub.
+            torch_dtype (`str`, `optional`, defaults to `'float32'`):
+                The torch dtype to use for the model. Can be used for FP16/other precision inference.
        """
        # Get an instance of the corresponding class `XXXPipelineTests` in order to use `get_test_pipeline` and
        # `run_pipeline_test`.
@@ -203,14 +211,18 @@ class PipelineTesterMixin:
                    processor_name,
                ):
                    logger.warning(
-                        f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is "
+                        f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: test is "
                        f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer "
                        f"`{tokenizer_name}` | processor `{processor_name}`."
                    )
                    continue
-                self.run_pipeline_test(task, repo_name, model_architecture, tokenizer_name, processor_name, commit)
+                self.run_pipeline_test(
+                    task, repo_name, model_architecture, tokenizer_name, processor_name, commit, torch_dtype
+                )

-    def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, processor_name, commit):
+    def run_pipeline_test(
+        self, task, repo_name, model_architecture, tokenizer_name, processor_name, commit, torch_dtype="float32"
+    ):
        """Run pipeline tests for a specific `task` with the give model class and tokenizer/processor class name

        The model will be loaded from a model repository on the Hub.
@@ -226,6 +238,10 @@ class PipelineTesterMixin:
                The name of a subclass of `PreTrainedTokenizerFast` or `PreTrainedTokenizer`.
            processor_name (`str`):
                The name of a subclass of `BaseImageProcessor` or `FeatureExtractionMixin`.
+            commit (`str`):
+                The commit hash of the model repository on the Hub.
+            torch_dtype (`str`, `optional`, defaults to `'float32'`):
+                The torch dtype to use for the model. Can be used for FP16/other precision inference.
        """
        repo_id = f"{TRANSFORMERS_TINY_MODEL_PATH}/{repo_name}"
        if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing":
@@ -245,7 +261,7 @@ class PipelineTesterMixin:
                processor = processor_class.from_pretrained(repo_id, revision=commit)
            except Exception:
                logger.warning(
-                    f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not load the "
+                    f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not load the "
                    f"processor from `{repo_id}` with `{processor_name}`."
                )
                self.skipTest(f"Could not load the processor from {repo_id} with {processor_name}.")
@@ -253,7 +269,7 @@ class PipelineTesterMixin:
        # TODO: Maybe not upload such problematic tiny models to Hub.
        if tokenizer is None and processor is None:
            logger.warning(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not find or load "
                f"any tokenizer / processor from `{repo_id}`."
            )
            self.skipTest(f"Could not find or load any tokenizer / processor from {repo_id}.")
@@ -263,7 +279,7 @@ class PipelineTesterMixin:
            model = model_architecture.from_pretrained(repo_id, revision=commit)
        except Exception:
            logger.warning(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not find or load "
                f"the model from `{repo_id}` with `{model_architecture}`."
            )
            self.skipTest(f"Could not find or load the model from {repo_id} with {model_architecture}.")
@@ -271,7 +287,7 @@ class PipelineTesterMixin:
        pipeline_test_class_name = pipeline_test_mapping[task]["test"].__name__
        if self.is_pipeline_test_to_skip_more(pipeline_test_class_name, model.config, model, tokenizer, processor):
            logger.warning(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: test is "
                f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer "
                f"`{tokenizer_name}` | processor `{processor_name}`."
            )
@@ -289,12 +305,12 @@ class PipelineTesterMixin:
        # `run_pipeline_test`.
        task_test = pipeline_test_mapping[task]["test"]()

-        pipeline, examples = task_test.get_test_pipeline(model, tokenizer, processor)
+        pipeline, examples = task_test.get_test_pipeline(model, tokenizer, processor, torch_dtype=torch_dtype)
        if pipeline is None:
            # The test can disable itself, but it should be very marginal
            # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
            logger.warning(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not get the "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not get the "
                "pipeline for testing."
            )
            self.skipTest(reason="Could not get the pipeline for testing.")
@@ -324,10 +340,20 @@ class PipelineTesterMixin:
    def test_pipeline_audio_classification(self):
        self.run_task_tests(task="audio-classification")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_audio_classification_fp16(self):
+        self.run_task_tests(task="audio-classification", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_automatic_speech_recognition(self):
        self.run_task_tests(task="automatic-speech-recognition")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_automatic_speech_recognition_fp16(self):
+        self.run_task_tests(task="automatic-speech-recognition", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    @require_timm
@@ -335,6 +361,13 @@ class PipelineTesterMixin:
    def test_pipeline_depth_estimation(self):
        self.run_task_tests(task="depth-estimation")

+    @is_pipeline_test
+    @require_vision
+    @require_timm
+    @require_torch
+    def test_pipeline_depth_estimation_fp16(self):
+        self.run_task_tests(task="depth-estimation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_pytesseract
    @require_torch
@@ -342,20 +375,43 @@ class PipelineTesterMixin:
    def test_pipeline_document_question_answering(self):
        self.run_task_tests(task="document-question-answering")

+    @is_pipeline_test
+    @require_pytesseract
+    @require_torch
+    @require_vision
+    def test_pipeline_document_question_answering_fp16(self):
+        self.run_task_tests(task="document-question-answering", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_feature_extraction(self):
        self.run_task_tests(task="feature-extraction")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_feature_extraction_fp16(self):
+        self.run_task_tests(task="feature-extraction", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_fill_mask(self):
        self.run_task_tests(task="fill-mask")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_fill_mask_fp16(self):
+        self.run_task_tests(task="fill-mask", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch_or_tf
    @require_vision
    def test_pipeline_image_classification(self):
        self.run_task_tests(task="image-classification")

+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_image_classification_fp16(self):
+        self.run_task_tests(task="image-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    @require_timm
@@ -363,11 +419,24 @@ class PipelineTesterMixin:
    def test_pipeline_image_segmentation(self):
        self.run_task_tests(task="image-segmentation")

+    @is_pipeline_test
+    @require_vision
+    @require_timm
+    @require_torch
+    def test_pipeline_image_segmentation_fp16(self):
+        self.run_task_tests(task="image-segmentation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    def test_pipeline_image_to_text(self):
        self.run_task_tests(task="image-to-text")

+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_image_to_text_fp16(self):
+        self.run_task_tests(task="image-to-text", torch_dtype="float16")
+
    @is_pipeline_test
    @require_timm
    @require_vision
@@ -375,6 +444,13 @@ class PipelineTesterMixin:
    def test_pipeline_image_feature_extraction(self):
        self.run_task_tests(task="image-feature-extraction")

+    @is_pipeline_test
+    @require_timm
+    @require_vision
+    @require_torch
+    def test_pipeline_image_feature_extraction_fp16(self):
+        self.run_task_tests(task="image-feature-extraction", torch_dtype="float16")
+
    @unittest.skip(reason="`run_pipeline_test` is currently not implemented.")
    @is_pipeline_test
    @require_vision
@@ -382,6 +458,13 @@ class PipelineTesterMixin:
    def test_pipeline_mask_generation(self):
        self.run_task_tests(task="mask-generation")

+    @unittest.skip(reason="`run_pipeline_test` is currently not implemented.")
+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_mask_generation_fp16(self):
+        self.run_task_tests(task="mask-generation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    @require_timm
@@ -389,44 +472,96 @@ class PipelineTesterMixin:
    def test_pipeline_object_detection(self):
        self.run_task_tests(task="object-detection")

+    @is_pipeline_test
+    @require_vision
+    @require_timm
+    @require_torch
+    def test_pipeline_object_detection_fp16(self):
+        self.run_task_tests(task="object-detection", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_question_answering(self):
        self.run_task_tests(task="question-answering")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_question_answering_fp16(self):
+        self.run_task_tests(task="question-answering", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_summarization(self):
        self.run_task_tests(task="summarization")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_summarization_fp16(self):
+        self.run_task_tests(task="summarization", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_table_question_answering(self):
        self.run_task_tests(task="table-question-answering")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_table_question_answering_fp16(self):
+        self.run_task_tests(task="table-question-answering", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_text2text_generation(self):
        self.run_task_tests(task="text2text-generation")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_text2text_generation_fp16(self):
+        self.run_task_tests(task="text2text-generation", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_text_classification(self):
        self.run_task_tests(task="text-classification")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_text_classification_fp16(self):
+        self.run_task_tests(task="text-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch_or_tf
    def test_pipeline_text_generation(self):
        self.run_task_tests(task="text-generation")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_text_generation_fp16(self):
+        self.run_task_tests(task="text-generation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch
    def test_pipeline_text_to_audio(self):
        self.run_task_tests(task="text-to-audio")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_text_to_audio_fp16(self):
+        self.run_task_tests(task="text-to-audio", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_token_classification(self):
        self.run_task_tests(task="token-classification")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_token_classification_fp16(self):
+        self.run_task_tests(task="token-classification", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_translation(self):
        self.run_task_tests(task="translation")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_translation_fp16(self):
+        self.run_task_tests(task="translation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch_or_tf
    @require_vision
@@ -434,32 +569,67 @@ class PipelineTesterMixin:
    def test_pipeline_video_classification(self):
        self.run_task_tests(task="video-classification")

+    @is_pipeline_test
+    @require_vision
+    @require_decord
+    @require_torch
+    def test_pipeline_video_classification_fp16(self):
+        self.run_task_tests(task="video-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch
    @require_vision
    def test_pipeline_visual_question_answering(self):
        self.run_task_tests(task="visual-question-answering")

+    @is_pipeline_test
+    @require_torch
+    @require_vision
+    def test_pipeline_visual_question_answering_fp16(self):
+        self.run_task_tests(task="visual-question-answering", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_zero_shot(self):
        self.run_task_tests(task="zero-shot")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_zero_shot_fp16(self):
+        self.run_task_tests(task="zero-shot", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch
    def test_pipeline_zero_shot_audio_classification(self):
        self.run_task_tests(task="zero-shot-audio-classification")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_zero_shot_audio_classification_fp16(self):
+        self.run_task_tests(task="zero-shot-audio-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    def test_pipeline_zero_shot_image_classification(self):
        self.run_task_tests(task="zero-shot-image-classification")

+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_zero_shot_image_classification_fp16(self):
+        self.run_task_tests(task="zero-shot-image-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    @require_torch
    def test_pipeline_zero_shot_object_detection(self):
        self.run_task_tests(task="zero-shot-object-detection")

+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_zero_shot_object_detection_fp16(self):
+        self.run_task_tests(task="zero-shot-object-detection", torch_dtype="float16")
+
    # This contains the test cases to be skipped without model architecture being involved.
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name