Fix resizing bug in OWL-ViT (#18573)

* Fixes resizing bug in OWL-ViT * Defaults to square resize if size is set to an int * Sets do_center_crop default value to False

Fix resizing bug in OWL-ViT (#18573)
* Fixes resizing bug in OWL-ViT * Defaults to square resize if size is set to an int * Sets do_center_crop default value to False
f762f373 · Alara Dirik · GitHub · 76568d24 · f762f373
Unverified Commit f762f373 authored Aug 11, 2022 by Alara Dirik Committed by GitHub Aug 11, 2022
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 6 deletions

src/transformers/models/owlvit/feature_extraction_owlvit.py src/transformers/models/owlvit/feature_extraction_owlvit.py +8 -6

No files found.
--- a/src/transformers/models/owlvit/feature_extraction_owlvit.py
+++ b/src/transformers/models/owlvit/feature_extraction_owlvit.py
@@ -50,13 +50,15 @@ class OwlViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin
    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the shorter edge of the input to a certain `size`.
-        size (`int`, *optional*, defaults to 768):
+        size (`int` or `Tuple[int, int]`, *optional*, defaults to (768, 768)):
-            Resize the shorter edge of the input to the given size. Only has an effect if `do_resize` is set to `True`.
+            The size to use for resizing the image. Only has an effect if `do_resize` is set to `True`. If `size` is a
+            sequence like (h, w), output size will be matched to this. If `size` is an int, then image will be resized
+            to (size, size).
        resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
            An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
            `PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
            if `do_resize` is set to `True`.
-        do_center_crop (`bool`, *optional*, defaults to `True`):
+        do_center_crop (`bool`, *optional*, defaults to `False`):
            Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
            image is padded with 0's and then center cropped.
        crop_size (`int`, *optional*, defaults to 768):
@@ -74,10 +76,10 @@ class OwlViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin
    def __init__(
        self,
        do_resize=True,
-        size=768,
+        size=(768, 768),
        resample=Image.BICUBIC,
        crop_size=768,
-        do_center_crop=True,
+        do_center_crop=False,
        do_normalize=True,
        image_mean=None,
        image_std=None,
@@ -195,7 +197,7 @@ class OwlViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin
        # transformations (resizing + center cropping + normalization)
        if self.do_resize and self.size is not None and self.resample is not None:
            images = [
-                self.resize(image=image, size=self.size, resample=self.resample, default_to_square=False)
+                self.resize(image=image, size=self.size, resample=self.resample, default_to_square=True)
                for image in images
            ]
        if self.do_center_crop and self.crop_size is not None: