Unverified Commit 250ae9f7 authored by Pablo Montalvo's avatar Pablo Montalvo Committed by GitHub
Browse files

Paligemma - fix slow tests, add bf16 and f16 slow tests (#30851)

* fix slow tests, add bf16 and f16 slow tests

* few fixes

* [run-slow]paligemma

* add gate decorator

* [run-slow]paligemma

* add missing gating

* [run-slow]paligemma

* [run-slow]paligemma
parent ada86f97
...@@ -28,7 +28,7 @@ from transformers import ( ...@@ -28,7 +28,7 @@ from transformers import (
is_vision_available, is_vision_available,
) )
from transformers.testing_utils import ( from transformers.testing_utils import (
require_bitsandbytes, require_read_token,
require_torch, require_torch,
require_torch_sdpa, require_torch_sdpa,
slow, slow,
...@@ -260,60 +260,32 @@ class PaliGemmaForConditionalGenerationModelTest(ModelTesterMixin, unittest.Test ...@@ -260,60 +260,32 @@ class PaliGemmaForConditionalGenerationModelTest(ModelTesterMixin, unittest.Test
@slow @slow
@require_torch @require_torch
@require_read_token
class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase): class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase):
def setUp(self): def setUp(self):
self.processor = PaliGemmaProcessor.from_pretrained("gv-hf/PaliGemma-test-224px-hf") self.processor = PaliGemmaProcessor.from_pretrained("google/paligemma-3b-pt-224")
def tearDown(self): def tearDown(self):
gc.collect() gc.collect()
torch.cuda.empty_cache() torch.cuda.empty_cache()
@slow @slow
@require_bitsandbytes @require_read_token
def test_small_model_integration_test(self): def test_small_model_integration_test(self):
# Let' s make sure we test the preprocessing to replace what is used # Let' s make sure we test the preprocessing to replace what is used
model = PaliGemmaForConditionalGeneration.from_pretrained("gv-hf/PaliGemma-test-224px-hf") model_id = "google/paligemma-3b-pt-224"
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
prompt = "" prompt = ""
image_file = ( image_file = (
"https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png" "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
) )
raw_image = Image.open(requests.get(image_file, stream=True).raw) raw_image = Image.open(requests.get(image_file, stream=True).raw)
inputs = self.processor(text=prompt, images=raw_image, return_tensors="pt") inputs = self.processor(text=prompt, images=raw_image, return_tensors="pt")
# fmt: off EXPECTED_INPUT_IDS = torch.tensor([[257152] * 256 + [2, 108]])
EXPECTED_INPUT_IDS = torch.tensor([[256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000, 256000,
256000, 256000, 256000, 256000, 2, 108]])
# fmt: on
self.assertTrue(torch.equal(inputs["input_ids"], EXPECTED_INPUT_IDS)) self.assertTrue(torch.equal(inputs["input_ids"], EXPECTED_INPUT_IDS))
output = model.generate(**inputs, max_new_tokens=20) output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = "\ncow standing on the beach" # fmt: skip EXPECTED_DECODED_TEXT = "\ncow on the beach" # fmt: skip
self.assertEqual( self.assertEqual(
self.processor.decode(output[0], skip_special_tokens=True), self.processor.decode(output[0], skip_special_tokens=True),
...@@ -321,37 +293,55 @@ class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase): ...@@ -321,37 +293,55 @@ class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase):
) )
@slow @slow
@require_bitsandbytes @require_read_token
def test_small_model_integration_test_paligemma(self): def test_small_model_integration_test_paligemma_VQA(self):
# Let' s make sure we test the preprocessing to replace what is used # Let' s make sure we test the preprocessing to replace what is used
model_id = "gv-hf/PaliGemma-test-224px-hf" model_id = "google/paligemma-3b-pt-224"
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
model = PaliGemmaForConditionalGeneration.from_pretrained("gv-hf/PaliGemma-test-224px-hf")
processor = PaliGemmaProcessor.from_pretrained(model_id)
prompt = "answer en Where is the cow standing?" prompt = "answer en Where is the cow standing?"
image_file = ( image_file = (
"https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png" "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
) )
raw_image = Image.open(requests.get(image_file, stream=True).raw) raw_image = Image.open(requests.get(image_file, stream=True).raw)
inputs = processor(text=prompt, images=raw_image, return_tensors="pt").to(torch.float16) inputs = self.processor(text=prompt, images=raw_image, return_tensors="pt").to(torch.float16)
output = model.generate(**inputs, max_new_tokens=900, do_sample=False) output = model.generate(**inputs, max_new_tokens=900, do_sample=False)
EXPECTED_DECODED_TEXT = "answer en Where is the cow standing?\nbeach" # fmt: skip EXPECTED_DECODED_TEXT = "answer en Where is the cow standing?\nbeach" # fmt: skip
self.assertEqual( self.assertEqual(
processor.decode(output[0], skip_special_tokens=True), self.processor.decode(output[0], skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
@slow
@require_read_token
def test_small_model_integration_test_paligemma_empty_prompt(self):
# Let' s make sure we test the preprocessing to replace what is used
model_id = "google/paligemma-3b-pt-224"
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
prompt = ""
image_file = (
"https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
)
raw_image = Image.open(requests.get(image_file, stream=True).raw)
inputs = self.processor(text=prompt, images=raw_image, return_tensors="pt").to(torch.float16)
output = model.generate(**inputs, max_new_tokens=900, do_sample=False)
EXPECTED_DECODED_TEXT = "\ncow on the beach" # fmt: skip
self.assertEqual(
self.processor.decode(output[0], skip_special_tokens=True),
EXPECTED_DECODED_TEXT, EXPECTED_DECODED_TEXT,
) )
@slow @slow
@require_bitsandbytes @require_read_token
def test_small_model_integration_test_paligemma_batched(self): def test_small_model_integration_test_paligemma_batched(self):
# Let' s make sure we test the preprocessing to replace what is used # Let' s make sure we test the preprocessing to replace what is used
model_id = "gv-hf/PaliGemma-test-224px-hf" model_id = "google/paligemma-3b-pt-224"
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id) model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
processor = PaliGemmaProcessor.from_pretrained(model_id)
prompts = [ prompts = [
"answer en Where is the cow standing?", "answer en Where is the cow standing?",
...@@ -365,19 +355,23 @@ class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase): ...@@ -365,19 +355,23 @@ class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase):
) )
image2 = image1 image2 = image1
inputs = processor(text=prompts, images=[image1, image2], return_tensors="pt", padding=True) inputs = self.processor(text=prompts, images=[image1, image2], return_tensors="pt", padding=True)
output = model.generate(**inputs, max_new_tokens=20) output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = ["answer en Where is the cow standing?\nbeach", "\ncow standing on the beach"] # fmt: skip EXPECTED_DECODED_TEXT = ["answer en Where is the cow standing?\nbeach", "\ncow on the beach"] # fmt: skip
self.assertEqual(processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT) self.assertEqual(self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT)
@slow @slow
@require_bitsandbytes @require_torch
def test_small_model_integration_test_batch(self): @require_read_token
def test_small_model_integration_test_paligemma_batched_bf16(self):
# Let' s make sure we test the preprocessing to replace what is used # Let' s make sure we test the preprocessing to replace what is used
model = PaliGemmaForConditionalGeneration.from_pretrained("gv-hf/PaliGemma-test-224px-hf") model_id = "google/paligemma-3b-pt-224"
model = PaliGemmaForConditionalGeneration.from_pretrained(
model_id, revision="bfloat16", torch_dtype=torch.bfloat16
).to(torch_device)
# The first batch is longer in terms of text, the second will be padded. # The first batch is longer in terms of text, the second will be padded.
prompts = [ prompts = [
"answer en Where is the cow standing?", "answer en Where is the cow standing?",
...@@ -391,24 +385,58 @@ class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase): ...@@ -391,24 +385,58 @@ class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase):
) )
image2 = image1 image2 = image1
inputs = self.processor(text=prompts, images=[image1, image2], return_tensors="pt", padding=True) inputs = (
self.processor(text=prompts, images=[image1, image2], return_tensors="pt", padding=True)
.to(torch.bfloat16)
.to(torch_device)
)
output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = ["answer en Where is the cow standing?\nbeach", "\ncow on the beach"] # fmt: skip
self.assertEqual(self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT)
@slow
@require_torch
@require_read_token
def test_small_model_integration_test_paligemma_batched_f16(self):
# Let' s make sure we test the preprocessing to replace what is used
model_id = "google/paligemma-3b-pt-224"
model = PaliGemmaForConditionalGeneration.from_pretrained(
model_id, revision="float16", torch_dtype=torch.float16
).to(torch_device)
# The first batch is longer in terms of text, the second will be padded.
prompts = [
"answer en Where is the cow standing?",
"",
]
image1 = Image.open(
requests.get(
"https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png",
stream=True,
).raw
)
image2 = image1
inputs = (
self.processor(text=prompts, images=[image1, image2], return_tensors="pt", padding=True)
.to(torch.float16)
.to(torch_device)
)
output = model.generate(**inputs, max_new_tokens=20) output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = ["answer en Where is the cow standing?\nbeach", "\ncow standing on the beach"] # fmt: skip EXPECTED_DECODED_TEXT = ["answer en Where is the cow standing?\nbeach", "\ncow on the beach"] # fmt: skip
self.assertEqual(self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT) self.assertEqual(self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT)
@slow @slow
@require_bitsandbytes @require_read_token
def test_paligemma_index_error_bug(self): def test_paligemma_index_error_bug(self):
# This is a reproducer of https://github.com/huggingface/transformers/pull/28032 and makes sure it does not happen anymore # This is a reproducer of https://github.com/huggingface/transformers/pull/28032 and makes sure it does not happen anymore
# Please refer to that PR, or specifically https://github.com/huggingface/transformers/pull/28032#issuecomment-1860650043 for # Please refer to that PR, or specifically https://github.com/huggingface/transformers/pull/28032#issuecomment-1860650043 for
# more details # more details
model_id = "gv-hf/PaliGemma-test-224px-hf" model_id = "google/paligemma-3b-pt-224"
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id) model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
processor = PaliGemmaProcessor.from_pretrained(model_id)
# Simulate a super long prompt # Simulate a super long prompt
prompt = "\n" * 200 prompt = "\n" * 200
image_file = ( image_file = (
...@@ -416,7 +444,7 @@ class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase): ...@@ -416,7 +444,7 @@ class PaliGemmaForConditionalGenerationIntegrationTest(unittest.TestCase):
) )
raw_image = Image.open(requests.get(image_file, stream=True).raw) raw_image = Image.open(requests.get(image_file, stream=True).raw)
inputs = processor( inputs = self.processor(
text=prompt, text=prompt,
images=raw_image, images=raw_image,
return_tensors="pt", return_tensors="pt",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment