Commit 1c242485 authored by Baber's avatar Baber
Browse files

pad images if exception

parent 6dc55fb3
...@@ -17,6 +17,7 @@ from lm_eval.models.utils import ( ...@@ -17,6 +17,7 @@ from lm_eval.models.utils import (
replace_placeholders, replace_placeholders,
stop_sequences_criteria, stop_sequences_criteria,
) )
from lm_eval.utils import add_padding_if_needed
DEFAULT_IMAGE_PLACEHOLDER = "<image>" DEFAULT_IMAGE_PLACEHOLDER = "<image>"
...@@ -266,7 +267,9 @@ class HFMultimodalLM(HFLM): ...@@ -266,7 +267,9 @@ class HFMultimodalLM(HFLM):
def tok_batch_multimodal_encode( def tok_batch_multimodal_encode(
self, self,
strings: List[str], # note that input signature of this fn is different strings: List[str], # note that input signature of this fn is different
images: List[List], # TODO: images are pil.Image at the moment, update typehint images: List[
List["PIL.Image.Image"] # noqa: F821
], # TODO: images are pil.Image at the moment, update typehint
padding_side: str = "left", padding_side: str = "left",
left_truncate_len: int = None, left_truncate_len: int = None,
truncation: bool = False, truncation: bool = False,
...@@ -292,15 +295,25 @@ class HFMultimodalLM(HFLM): ...@@ -292,15 +295,25 @@ class HFMultimodalLM(HFLM):
images = [img[: self.max_images] for img in images] images = [img[: self.max_images] for img in images]
if self.rgb: if self.rgb:
images = [[img.convert("RGB") for img in sublist] for sublist in images] images = [[img.convert("RGB") for img in sublist] for sublist in images]
try:
encoding = self.processor( encoding = self.processor(
images=images, images=images,
text=strings, text=strings,
truncation=truncation, truncation=truncation,
padding="longest", padding="longest",
return_tensors="pt", return_tensors="pt",
# **add_special_tokens, # TODO: at least some Processors error out when passing this. How do we control whether text gets BOS added? # **add_special_tokens, # TODO: at least some Processors error out when passing this. How do we control whether text gets BOS added?
) )
# Qwen processor errors out if a dimension is too small (defaults to do_resize=True, and that requires a min dimension)
except Exception:
encoding = self.processor(
images=[add_padding_if_needed(image) for image in images],
text=strings,
truncation=truncation,
padding="longest",
return_tensors="pt",
# **add_special_tokens, # TODO: at least some Processors error out when passing this. How do we control whether text gets BOS added?
)
encoding.to( # TODO: our other tokenization methods in HFLM don't typically move to device. this breaks convention encoding.to( # TODO: our other tokenization methods in HFLM don't typically move to device. this breaks convention
self.device, self.model.dtype self.device, self.model.dtype
......
...@@ -499,3 +499,40 @@ def weighted_f1_score(items): ...@@ -499,3 +499,40 @@ def weighted_f1_score(items):
preds = unzipped_list[1] preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted") fscore = f1_score(golds, preds, average="weighted")
return fscore return fscore
def add_padding_if_needed(
images: List["PIL.Image.Image"], # noqa: F821
min_width: int = 50,
min_height: int = 50,
color=(255, 255, 255),
) -> List["PIL.Image.Image"]: # noqa: F821
"""Adds (default white) padding to images to make them at least min_width and min_height"""
from PIL import ImageOps
res = []
for image in images:
width, height = image.size
if width >= min_width and height >= min_height:
return image
image = image.convert("RGB")
new_width = max(width, min_width)
new_height = max(height, min_height)
delta_width = new_width - width
delta_height = new_height - height
padding_left = delta_width // 2
padding_right = delta_width - padding_left
padding_top = delta_height // 2
padding_bottom = delta_height - padding_top
res.append(
ImageOps.expand(
image,
(padding_left, padding_top, padding_right, padding_bottom),
fill=color,
)
)
return res
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment