"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "b87eb82b4f6766719323558c905a3122b41b047b"
Unverified Commit 874ac129 authored by Sangbum Daniel Choi's avatar Sangbum Daniel Choi Committed by GitHub
Browse files

fix the get_size_with_aspect_ratio in max_size situation (#30902)



* fix the get_size_with_aspect_ratio in max_size situation

* make fix-up

* add more general solution

* consider when max_size is not defined

* fix typo

* fix typo

* simple fix

* fix error

* fix if else error

* fix error of size overwrite

* fix yolos image processing

* fix detr image processing

* make

* add longest related test script

* Update src/transformers/models/yolos/image_processing_yolos.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* add more test

* add test script about longest size

* remove deprecated

---------
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent e4628434
...@@ -100,21 +100,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in ...@@ -100,21 +100,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size. The maximum allowed output size.
""" """
height, width = image_size height, width = image_size
raw_size = None
if max_size is not None: if max_size is not None:
min_original_size = float(min((height, width))) min_original_size = float(min((height, width)))
max_original_size = float(max((height, width))) max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size: if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size)) raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size): if (height <= width and height == size) or (width <= height and width == size):
return height, width oh, ow = height, width
elif width < height:
if width < height:
ow = size ow = size
oh = int(size * height / width) if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else: else:
oh = size oh = size
ow = int(size * width / height) if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow) return (oh, ow)
......
...@@ -98,21 +98,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in ...@@ -98,21 +98,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size. The maximum allowed output size.
""" """
height, width = image_size height, width = image_size
raw_size = None
if max_size is not None: if max_size is not None:
min_original_size = float(min((height, width))) min_original_size = float(min((height, width)))
max_original_size = float(max((height, width))) max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size: if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size)) raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size): if (height <= width and height == size) or (width <= height and width == size):
return height, width oh, ow = height, width
elif width < height:
if width < height:
ow = size ow = size
oh = int(size * height / width) if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else: else:
oh = size oh = size
ow = int(size * width / height) if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow) return (oh, ow)
......
...@@ -91,21 +91,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in ...@@ -91,21 +91,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size. The maximum allowed output size.
""" """
height, width = image_size height, width = image_size
raw_size = None
if max_size is not None: if max_size is not None:
min_original_size = float(min((height, width))) min_original_size = float(min((height, width)))
max_original_size = float(max((height, width))) max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size: if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size)) raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size): if (height <= width and height == size) or (width <= height and width == size):
return height, width oh, ow = height, width
elif width < height:
if width < height:
ow = size ow = size
oh = int(size * height / width) if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else: else:
oh = size oh = size
ow = int(size * width / height) if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow) return (oh, ow)
......
...@@ -98,21 +98,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in ...@@ -98,21 +98,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size. The maximum allowed output size.
""" """
height, width = image_size height, width = image_size
raw_size = None
if max_size is not None: if max_size is not None:
min_original_size = float(min((height, width))) min_original_size = float(min((height, width)))
max_original_size = float(max((height, width))) max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size: if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size)) raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size): if (height <= width and height == size) or (width <= height and width == size):
return height, width oh, ow = height, width
elif width < height:
if width < height:
ow = size ow = size
oh = int(size * height / width) if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else: else:
oh = size oh = size
ow = int(size * width / height) if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow) return (oh, ow)
......
...@@ -105,21 +105,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in ...@@ -105,21 +105,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size. The maximum allowed output size.
""" """
height, width = image_size height, width = image_size
raw_size = None
if max_size is not None: if max_size is not None:
min_original_size = float(min((height, width))) min_original_size = float(min((height, width)))
max_original_size = float(max((height, width))) max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size: if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size)) raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size): if (height <= width and height == size) or (width <= height and width == size):
return height, width oh, ow = height, width
elif width < height:
if width < height:
ow = size ow = size
oh = int(size * height / width) if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else: else:
oh = size oh = size
ow = int(size * width / height) if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow) return (oh, ow)
......
...@@ -101,9 +101,11 @@ def get_max_height_width( ...@@ -101,9 +101,11 @@ def get_max_height_width(
return (max_height, max_width) return (max_height, max_width)
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]: def get_size_with_aspect_ratio(
image_size: Tuple[int, int], size: int, max_size: Optional[int] = None, mod_size: int = 16
) -> Tuple[int, int]:
""" """
Computes the output image size given the input image size and the desired output size. Computes the output image size given the input image size and the desired output size with multiple of divisible_size.
Args: Args:
image_size (`Tuple[int, int]`): image_size (`Tuple[int, int]`):
...@@ -112,25 +114,40 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in ...@@ -112,25 +114,40 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The desired output size. The desired output size.
max_size (`int`, *optional*): max_size (`int`, *optional*):
The maximum allowed output size. The maximum allowed output size.
mod_size (`int`, *optional*):
The size to make multiple of mod_size.
""" """
height, width = image_size height, width = image_size
raw_size = None
if max_size is not None: if max_size is not None:
min_original_size = float(min((height, width))) min_original_size = float(min((height, width)))
max_original_size = float(max((height, width))) max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size: if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size)) raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if width <= height and width != size:
height = int(size * height / width) if width < height:
width = size ow = size
elif height < width and height != size: if max_size is not None and raw_size is not None:
width = int(size * width / height) oh = int(raw_size * height / width)
height = size else:
width_mod = np.mod(width, 16) oh = int(size * height / width)
height_mod = np.mod(height, 16) elif (height <= width and height == size) or (width <= height and width == size):
width = width - width_mod oh, ow = height, width
height = height - height_mod else:
return (height, width) oh = size
if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
if mod_size is not None:
ow_mod = np.mod(ow, mod_size)
oh_mod = np.mod(oh, mod_size)
ow = ow - ow_mod
oh = oh - oh_mod
return (oh, ow)
# Copied from transformers.models.detr.image_processing_detr.get_image_size_for_max_height_width # Copied from transformers.models.detr.image_processing_detr.get_image_size_for_max_height_width
......
...@@ -537,3 +537,55 @@ class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcess ...@@ -537,3 +537,55 @@ class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcess
) )
inputs = image_processor(images=[image_1, image_2], return_tensors="pt") inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100])) self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
def test_longest_edge_shortest_edge_resizing_strategy(self):
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
# max size is set; width < height;
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
# max size is set; height < width;
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
# max size is set; width == size; height > max_size;
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 118, "shortest_edge": 100},
do_pad=False,
)
inputs = image_processor(images=[image_3], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
# max size is set; height == size; width < max_size;
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 256, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_4], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
# max size is set; height == width; width < max_size;
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 117, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_5], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))
...@@ -539,3 +539,55 @@ class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessi ...@@ -539,3 +539,55 @@ class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessi
) )
inputs = image_processor(images=[image_1, image_2], return_tensors="pt") inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100])) self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
def test_longest_edge_shortest_edge_resizing_strategy(self):
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
# max size is set; width < height;
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
# max size is set; height < width;
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
# max size is set; width == size; height > max_size;
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 118, "shortest_edge": 100},
do_pad=False,
)
inputs = image_processor(images=[image_3], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
# max size is set; height == size; width < max_size;
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 256, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_4], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
# max size is set; height == width; width < max_size;
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 117, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_5], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))
...@@ -593,3 +593,55 @@ class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixi ...@@ -593,3 +593,55 @@ class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixi
) )
inputs = image_processor(images=[image_1, image_2], return_tensors="pt") inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100])) self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
def test_longest_edge_shortest_edge_resizing_strategy(self):
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
# max size is set; width < height;
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
image_processor = DetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
# max size is set; height < width;
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
image_processor = DetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
# max size is set; width == size; height > max_size;
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
image_processor = DetrImageProcessor(
size={"longest_edge": 118, "shortest_edge": 100},
do_pad=False,
)
inputs = image_processor(images=[image_3], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
# max size is set; height == size; width < max_size;
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
image_processor = DetrImageProcessor(
size={"longest_edge": 256, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_4], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
# max size is set; height == width; width < max_size;
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
image_processor = DetrImageProcessor(
size={"longest_edge": 117, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_5], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))
...@@ -575,3 +575,55 @@ class GroundingDinoImageProcessingTest(AnnotationFormatTestMixin, ImageProcessin ...@@ -575,3 +575,55 @@ class GroundingDinoImageProcessingTest(AnnotationFormatTestMixin, ImageProcessin
) )
inputs = image_processor(images=[image_1, image_2], return_tensors="pt") inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100])) self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
def test_longest_edge_shortest_edge_resizing_strategy(self):
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
# max size is set; width < height;
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
# max size is set; height < width;
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
# max size is set; width == size; height > max_size;
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 118, "shortest_edge": 100},
do_pad=False,
)
inputs = image_processor(images=[image_3], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
# max size is set; height == size; width < max_size;
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 256, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_4], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
# max size is set; height == width; width < max_size;
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 117, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_5], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment