Unverified Commit ac262604 authored by Billy Cao's avatar Billy Cao Committed by GitHub
Browse files

Allow FP16 or other precision inference for Pipelines (#31342)



* cast image features to model.dtype where needed to support FP16 or other precision in pipelines

* Update src/transformers/pipelines/image_feature_extraction.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Use .to instead

* Add FP16 pipeline support for zeroshot audio classification

* Remove unused torch imports

* Add docs on FP16 pipeline

* Remove unused import

* Add FP16 tests to pipeline mixin

* Add fp16 placeholder for mask_generation pipeline test

* Add FP16 tests for all pipelines

* Fix formatting

* Remove torch_dtype arg from is_pipeline_test_to_skip*

* Fix format

* trigger ci

---------
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent e7868444
......@@ -174,7 +174,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase):
raise ValueError("We expect lists of floats, nothing else")
return shape
def get_test_pipeline(self, model, tokenizer, processor):
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
if tokenizer is None:
self.skipTest(reason="No tokenizer")
elif (
......@@ -193,7 +193,9 @@ class FeatureExtractionPipelineTests(unittest.TestCase):
For now ignore those.
"""
)
feature_extractor = FeatureExtractionPipeline(model=model, tokenizer=tokenizer, feature_extractor=processor)
feature_extractor = FeatureExtractionPipeline(
model=model, tokenizer=tokenizer, feature_extractor=processor, torch_dtype=torch_dtype
)
return feature_extractor, ["This is a test", "This is another test"]
def run_pipeline_test(self, feature_extractor, examples):
......
......@@ -251,11 +251,11 @@ class FillMaskPipelineTests(unittest.TestCase):
unmasker.tokenizer.pad_token = None
self.run_pipeline_test(unmasker, [])
def get_test_pipeline(self, model, tokenizer, processor):
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
if tokenizer is None or tokenizer.mask_token_id is None:
self.skipTest(reason="The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
examples = [
f"This is another {tokenizer.mask_token} test",
]
......
......@@ -55,8 +55,10 @@ class ImageClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
def get_test_pipeline(self, model, tokenizer, processor):
image_classifier = ImageClassificationPipeline(model=model, image_processor=processor, top_k=2)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
image_classifier = ImageClassificationPipeline(
model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype
)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
......
......@@ -157,7 +157,7 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase):
outputs = feature_extractor(img, return_tensors=True)
self.assertTrue(tf.is_tensor(outputs))
def get_test_pipeline(self, model, tokenizer, processor):
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
if processor is None:
self.skipTest(reason="No image processor")
......@@ -175,7 +175,9 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase):
"""
)
feature_extractor = ImageFeatureExtractionPipeline(model=model, image_processor=processor)
feature_extractor = ImageFeatureExtractionPipeline(
model=model, image_processor=processor, torch_dtype=torch_dtype
)
img = prepare_img()
return feature_extractor, [img, img]
......
......@@ -87,8 +87,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
)
def get_test_pipeline(self, model, tokenizer, processor):
image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
return image_segmenter, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
......
......@@ -54,9 +54,9 @@ class ImageToImagePipelineTests(unittest.TestCase):
@require_torch
@require_vision
@slow
def test_pipeline(self):
def test_pipeline(self, torch_dtype="float32"):
model_id = "caidas/swin2SR-classical-sr-x2-64"
upscaler = pipeline("image-to-image", model=model_id)
upscaler = pipeline("image-to-image", model=model_id, torch_dtype=torch_dtype)
upscaled_list = upscaler(self.examples)
self.assertEqual(len(upscaled_list), len(self.examples))
......@@ -66,6 +66,12 @@ class ImageToImagePipelineTests(unittest.TestCase):
self.assertEqual(upscaled_list[0].size, (1296, 976))
self.assertEqual(upscaled_list[1].size, (1296, 976))
@require_torch
@require_vision
@slow
def test_pipeline_fp16(self):
self.test_pipeline(torch_dtype="float16")
@require_torch
@require_vision
@slow
......
......@@ -45,8 +45,10 @@ class ImageToTextPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING
def get_test_pipeline(self, model, tokenizer, processor):
pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, image_processor=processor)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
pipe = pipeline(
"image-to-text", model=model, tokenizer=tokenizer, image_processor=processor, torch_dtype=torch_dtype
)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"./tests/fixtures/tests_samples/COCO/000000039769.png",
......
......@@ -67,8 +67,8 @@ class MaskGenerationPipelineTests(unittest.TestCase):
(list(TF_MODEL_FOR_MASK_GENERATION_MAPPING.items()) if TF_MODEL_FOR_MASK_GENERATION_MAPPING else [])
)
def get_test_pipeline(self, model, tokenizer, processor):
image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
return image_segmenter, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
......
......@@ -53,8 +53,8 @@ else:
class ObjectDetectionPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
def get_test_pipeline(self, model, tokenizer, processor):
object_detector = ObjectDetectionPipeline(model=model, image_processor=processor)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
object_detector = ObjectDetectionPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
def run_pipeline_test(self, object_detector, examples):
......
......@@ -50,12 +50,12 @@ class QAPipelineTests(unittest.TestCase):
config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
}
def get_test_pipeline(self, model, tokenizer, processor):
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
if isinstance(model.config, LxmertConfig):
# This is an bimodal model, we need to find a more consistent way
# to switch on those models.
return None, None
question_answerer = QuestionAnsweringPipeline(model, tokenizer)
question_answerer = QuestionAnsweringPipeline(model, tokenizer, torch_dtype=torch_dtype)
examples = [
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
......
......@@ -32,8 +32,8 @@ class SummarizationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(self, model, tokenizer, processor):
summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]
def run_pipeline_test(self, summarizer, _):
......
......@@ -152,9 +152,9 @@ class TQAPipelineTests(unittest.TestCase):
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
@require_torch
def test_small_model_pt(self):
def test_small_model_pt(self, torch_dtype="float32"):
model_id = "lysandre/tiny-tapas-random-wtq"
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, torch_dtype=torch_dtype)
tokenizer = AutoTokenizer.from_pretrained(model_id)
self.assertIsInstance(model.config.aggregation_labels, dict)
self.assertIsInstance(model.config.no_aggregation_label_index, int)
......@@ -255,9 +255,14 @@ class TQAPipelineTests(unittest.TestCase):
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
@require_torch
def test_slow_tokenizer_sqa_pt(self):
def test_small_model_pt_fp16(self):
self.test_small_model_pt(torch_dtype="float16")
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
@require_torch
def test_slow_tokenizer_sqa_pt(self, torch_dtype="float32"):
model_id = "lysandre/tiny-tapas-random-sqa"
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, torch_dtype=torch_dtype)
tokenizer = AutoTokenizer.from_pretrained(model_id)
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
......@@ -373,6 +378,11 @@ class TQAPipelineTests(unittest.TestCase):
},
)
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
@require_torch
def test_slow_tokenizer_sqa_pt_fp16(self):
self.test_slow_tokenizer_sqa_pt(torch_dtype="float16")
@require_tf
@require_tensorflow_probability
@require_pandas
......@@ -498,8 +508,8 @@ class TQAPipelineTests(unittest.TestCase):
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
@slow
@require_torch
def test_integration_wtq_pt(self):
table_querier = pipeline("table-question-answering")
def test_integration_wtq_pt(self, torch_dtype="float32"):
table_querier = pipeline("table-question-answering", torch_dtype=torch_dtype)
data = {
"Repository": ["Transformers", "Datasets", "Tokenizers"],
......@@ -541,6 +551,12 @@ class TQAPipelineTests(unittest.TestCase):
]
self.assertListEqual(results, expected_results)
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
@slow
@require_torch
def test_integration_wtq_pt_fp16(self):
self.test_integration_wtq_pt(torch_dtype="float16")
@slow
@require_tensorflow_probability
@require_pandas
......@@ -593,11 +609,12 @@ class TQAPipelineTests(unittest.TestCase):
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
@slow
@require_torch
def test_integration_sqa_pt(self):
def test_integration_sqa_pt(self, torch_dtype="float32"):
table_querier = pipeline(
"table-question-answering",
model="google/tapas-base-finetuned-sqa",
tokenizer="google/tapas-base-finetuned-sqa",
torch_dtype=torch_dtype,
)
data = {
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
......@@ -615,6 +632,12 @@ class TQAPipelineTests(unittest.TestCase):
]
self.assertListEqual(results, expected_results)
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
@slow
@require_torch
def test_integration_sqa_pt_fp16(self):
self.test_integration_sqa_pt(torch_dtype="float16")
@slow
@require_tensorflow_probability
@require_pandas
......@@ -645,11 +668,12 @@ class TQAPipelineTests(unittest.TestCase):
@slow
@require_torch
def test_large_model_pt_tapex(self):
def test_large_model_pt_tapex(self, torch_dtype="float32"):
model_id = "microsoft/tapex-large-finetuned-wtq"
table_querier = pipeline(
"table-question-answering",
model=model_id,
torch_dtype=torch_dtype,
)
data = {
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
......
......@@ -35,8 +35,8 @@ class Text2TextGenerationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(self, model, tokenizer, processor):
generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
return generator, ["Something to write", "Something else"]
def run_pipeline_test(self, generator, _):
......
......@@ -179,8 +179,8 @@ class TextClassificationPipelineTests(unittest.TestCase):
outputs = text_classifier("Birds are a type of animal")
self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
def get_test_pipeline(self, model, tokenizer, processor):
text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
return text_classifier, ["HuggingFace is in", "This is another test"]
def run_pipeline_test(self, text_classifier, _):
......
......@@ -320,8 +320,8 @@ class TextGenerationPipelineTests(unittest.TestCase):
],
)
def get_test_pipeline(self, model, tokenizer, processor):
text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
return text_generator, ["This is a test", "Another test"]
def test_stop_sequence_stopping_criteria(self):
......
......@@ -250,8 +250,8 @@ class TextToAudioPipelineTests(unittest.TestCase):
outputs = music_generator("This is a test", forward_params=forward_params, generate_kwargs=generate_kwargs)
self.assertListEqual(outputs["audio"].tolist(), audio.tolist())
def get_test_pipeline(self, model, tokenizer, processor):
speech_generator = TextToAudioPipeline(model=model, tokenizer=tokenizer)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
speech_generator = TextToAudioPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
return speech_generator, ["This is a test", "Another test"]
def run_pipeline_test(self, speech_generator, _):
......
......@@ -56,8 +56,8 @@ class TokenClassificationPipelineTests(unittest.TestCase):
config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
}
def get_test_pipeline(self, model, tokenizer, processor):
token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]
def run_pipeline_test(self, token_classifier, _):
......
......@@ -35,12 +35,14 @@ class TranslationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(self, model, tokenizer, processor):
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
if isinstance(model.config, MBartConfig):
src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
translator = TranslationPipeline(
model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, torch_dtype=torch_dtype
)
else:
translator = TranslationPipeline(model=model, tokenizer=tokenizer)
translator = TranslationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
return translator, ["Some string", "Some other text"]
def run_pipeline_test(self, translator, _):
......
......@@ -38,11 +38,13 @@ from .test_pipelines_common import ANY
class VideoClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING
def get_test_pipeline(self, model, tokenizer, processor):
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
example_video_filepath = hf_hub_download(
repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
)
video_classifier = VideoClassificationPipeline(model=model, image_processor=processor, top_k=2)
video_classifier = VideoClassificationPipeline(
model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype
)
examples = [
example_video_filepath,
"https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4",
......
......@@ -55,8 +55,10 @@ else:
class VisualQuestionAnsweringPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING
def get_test_pipeline(self, model, tokenizer, processor):
vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
vqa_pipeline = pipeline(
"visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa", torch_dtype=torch_dtype
)
examples = [
{
"image": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment