Fix padding for IDEFICS (#26396)

* fix * fixup * tests * fixup

Fix padding for IDEFICS (#26396)
* fix * fixup * tests * fixup
abd25310 · Shauray Singh · GitHub · 408b2b3c · abd25310 · abd25310
Unverified Commit abd25310 authored Sep 27, 2023 by Shauray Singh Committed by GitHub Sep 27, 2023
Showing with 30 additions and 10 deletions

src/transformers/models/idefics/processing_idefics.py src/transformers/models/idefics/processing_idefics.py +11 -10

tests/models/idefics/test_processor_idefics.py tests/models/idefics/test_processor_idefics.py +19 -0

No files found.
--- a/src/transformers/models/idefics/processing_idefics.py
+++ b/src/transformers/models/idefics/processing_idefics.py
@@ -280,7 +280,7 @@ class IdeficsProcessor(ProcessorMixin):
            else:
                return fake_token + image_token + fake_token
-        all_texts = []
+        all_prompts = []
        all_images = []
        for sample in prompts:
            # the model was trained on samples starting with <s>
@@ -321,17 +321,18 @@ class IdeficsProcessor(ProcessorMixin):
            image_objects = self.image_processor(image_objects, transform=transform)
-            text_encoding = self.tokenizer(
+            all_prompts.append(full_text)
-                text=full_text,
-                add_special_tokens=False,
-                padding=padding,
-                truncation=truncation,
-                max_length=max_length,
-            )
-            all_texts.append(text_encoding["input_ids"])
            all_images.append(image_objects)
+        text_encoding = self.tokenizer(
+            text=all_prompts,
+            add_special_tokens=False,
+            padding=padding,
+            truncation=truncation,
+            max_length=max_length,
+        )
+        all_texts = text_encoding["input_ids"]
        max_seq_len = max(len(x) for x in all_texts)
        # max_num_images has to be at least 1 even when there are no images

--- a/tests/models/idefics/test_processor_idefics.py
+++ b/tests/models/idefics/test_processor_idefics.py
@@ -141,6 +141,25 @@ class IdeficsProcessorTest(TestCasePlus):
        self.assertListEqual(decoded_tok, decoded_processor)
+    def test_tokenizer_padding(self):
+        image_processor = self.get_image_processor()
+        tokenizer = self.get_tokenizer(padding_side="right")
+        processor = IdeficsProcessor(tokenizer=tokenizer, image_processor=image_processor)
+        predicted_tokens = [
+            "<s>Describe this image.\nAssistant:<unk><unk><unk><unk><unk><unk><unk><unk><unk>",
+            "<s>Describe this image.\nAssistant:<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>",
+        ]
+        prompts = [[prompt] for prompt in self.prepare_prompts()[2]]
+        max_length = processor(prompts, padding="max_length", truncation=True, max_length=20)
+        longest = processor(prompts, padding="longest", truncation=True, max_length=30)
+        decoded_max_length = processor.tokenizer.decode(max_length["input_ids"][-1])
+        decoded_longest = processor.tokenizer.decode(longest["input_ids"][-1])
+        self.assertEqual(decoded_max_length, predicted_tokens[1])
+        self.assertEqual(decoded_longest, predicted_tokens[0])
    def test_model_input_names(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()