Return input_ids in ImageGPT feature extractor (#16872)

cb555af2 · Sylvain Gugger · GitHub · e789418e · cb555af2 · cb555af2
Unverified Commit cb555af2 authored Apr 21, 2022 by Sylvain Gugger Committed by GitHub Apr 21, 2022
2 changed files
--- a/src/transformers/models/imagegpt/feature_extraction_imagegpt.py
+++ b/src/transformers/models/imagegpt/feature_extraction_imagegpt.py
@@ -68,7 +68,7 @@ class ImageGPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
            Whether or not to normalize the input to the range between -1 and +1.
    """
-    model_input_names = ["pixel_values"]
+    model_input_names = ["input_ids"]
    def __init__(self, clusters, do_resize=True, size=32, resample=Image.BILINEAR, do_normalize=True, **kwargs):
        super().__init__(**kwargs)
@@ -128,8 +128,7 @@ class ImageGPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
        Returns:
            [`BatchFeature`]: A [`BatchFeature`] with the following fields:
-            - **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height,
+            - **input_ids** -- Input IDs to be fed to a model, of shape `(batch_size, height * width)`.
-              width).
        """
        # Input type checking for clearer error
        valid_images = False
@@ -171,7 +170,7 @@ class ImageGPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
        images = images.reshape(batch_size, -1)
        # return as BatchFeature
-        data = {"pixel_values": images}
+        data = {"input_ids": images}
        encoded_inputs = BatchFeature(data=data, tensor_type=return_tensors)
        return encoded_inputs
--- a/tests/imagegpt/test_feature_extraction_imagegpt.py
+++ b/tests/imagegpt/test_feature_extraction_imagegpt.py
@@ -161,17 +161,17 @@ class ImageGPTFeatureExtractorIntegrationTest(unittest.TestCase):
        # test non-batched
        encoding = feature_extractor(images[0], return_tensors="pt")
-        self.assertIsInstance(encoding.pixel_values, torch.LongTensor)
+        self.assertIsInstance(encoding.input_ids, torch.LongTensor)
-        self.assertEqual(encoding.pixel_values.shape, (1, 1024))
+        self.assertEqual(encoding.input_ids.shape, (1, 1024))
        expected_slice = [306, 191, 191]
-        self.assertEqual(encoding.pixel_values[0, :3].tolist(), expected_slice)
+        self.assertEqual(encoding.input_ids[0, :3].tolist(), expected_slice)
        # test batched
        encoding = feature_extractor(images, return_tensors="pt")
-        self.assertIsInstance(encoding.pixel_values, torch.LongTensor)
+        self.assertIsInstance(encoding.input_ids, torch.LongTensor)
-        self.assertEqual(encoding.pixel_values.shape, (2, 1024))
+        self.assertEqual(encoding.input_ids.shape, (2, 1024))
        expected_slice = [303, 13, 13]
-        self.assertEqual(encoding.pixel_values[1, -3:].tolist(), expected_slice)
+        self.assertEqual(encoding.input_ids[1, -3:].tolist(), expected_slice)