Unverified Commit 2576a8cb authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Fix Llava-1.5-hf ; Update to version 0.4.5 (#2388)

parent 1ed1f9ed
...@@ -57,7 +57,6 @@ class TaskConfig(dict): ...@@ -57,7 +57,6 @@ class TaskConfig(dict):
task: Optional[str] = None task: Optional[str] = None
task_alias: Optional[str] = None task_alias: Optional[str] = None
tag: Optional[Union[str, list]] = None tag: Optional[Union[str, list]] = None
group: Optional[Union[str, list]] = None
# HF dataset options. # HF dataset options.
# which dataset to use, # which dataset to use,
# and what splits for what purpose # and what splits for what purpose
...@@ -98,18 +97,6 @@ class TaskConfig(dict): ...@@ -98,18 +97,6 @@ class TaskConfig(dict):
) )
def __post_init__(self) -> None: def __post_init__(self) -> None:
if self.group is not None:
eval_logger.warning(
"A task YAML file was found to contain a `group` key. Groups which provide aggregate scores over several subtasks now require a separate config file--if not aggregating, you may want to use the `tag` config option instead within your config. Setting `group` within a TaskConfig will be deprecated in v0.4.4. Please see https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/task_guide.md for more information."
)
if self.tag is None:
self.tag = self.group
else:
raise ValueError(
"Got both a `group` and `tag` entry within a TaskConfig. Please use one or the other--`group` values will be deprecated in v0.4.4."
)
if self.generation_kwargs is not None: if self.generation_kwargs is not None:
if self.output_type != "generate_until": if self.output_type != "generate_until":
eval_logger.warning( eval_logger.warning(
......
...@@ -13,6 +13,7 @@ from lm_eval.api.registry import register_model ...@@ -13,6 +13,7 @@ from lm_eval.api.registry import register_model
from lm_eval.models.huggingface import HFLM from lm_eval.models.huggingface import HFLM
from lm_eval.models.utils import ( from lm_eval.models.utils import (
Collator, Collator,
flatten_image_list,
pad_and_concat, pad_and_concat,
replace_placeholders, replace_placeholders,
stop_sequences_criteria, stop_sequences_criteria,
...@@ -293,6 +294,10 @@ class HFMultimodalLM(HFLM): ...@@ -293,6 +294,10 @@ class HFMultimodalLM(HFLM):
if self.rgb: if self.rgb:
images = [[img.convert("RGB") for img in sublist] for sublist in images] images = [[img.convert("RGB") for img in sublist] for sublist in images]
# certain models like llava expect a single-level image list even for bs>1, multi-image. TODO: port this over to loglikelihoods
if getattr(self.config, "model_type", "") == "llava":
images = flatten_image_list(images)
encoding = self.processor( encoding = self.processor(
images=images, images=images,
text=strings, text=strings,
......
...@@ -698,3 +698,14 @@ def replace_placeholders( ...@@ -698,3 +698,14 @@ def replace_placeholders(
# Add the last part of the string # Add the last part of the string
result.append(parts[-1]) result.append(parts[-1])
return "".join(result) return "".join(result)
def flatten_image_list(images: List[List]):
"""
Takes in a list of lists of images, and returns a single list of all images in order.
Used for some multimodal models like Llava-1.5 which expects this flattened-list format for its image processor.
:param images: A list of lists of PIL images.
:return: a list of PIL images, via concatenating all the sub-lists in order.
"""
return [image for image_list in images for image in image_list]
...@@ -443,39 +443,26 @@ class TaskManager: ...@@ -443,39 +443,26 @@ class TaskManager:
def _populate_tags_and_groups(config, task, tasks_and_groups, print_info): def _populate_tags_and_groups(config, task, tasks_and_groups, print_info):
# TODO: remove group in next release # TODO: remove group in next release
for attr in ["tag", "group"]: if "tag" in config:
if attr in config: attr_list = config["tag"]
if attr == "group" and print_info: if isinstance(attr_list, str):
attr_list = [attr_list]
for tag in attr_list:
if tag not in tasks_and_groups:
tasks_and_groups[tag] = {
"type": "tag",
"task": [task],
"yaml_path": -1,
}
elif tasks_and_groups[tag]["type"] != "tag":
self.logger.info( self.logger.info(
"`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. " f"The tag '{tag}' is already registered as a group, this tag will not be registered. "
"The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. " "This may affect tasks you want to call."
"`group`s which aggregate across subtasks must be only defined in a separate group config file, "
"which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
"Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
"for more information."
) )
print_info = False break
# attr = "tag" else:
tasks_and_groups[tag]["task"].append(task)
attr_list = config[attr]
if isinstance(attr_list, str):
attr_list = [attr_list]
for tag in attr_list:
if tag not in tasks_and_groups:
tasks_and_groups[tag] = {
"type": "tag",
"task": [task],
"yaml_path": -1,
}
elif tasks_and_groups[tag]["type"] != "tag":
self.logger.info(
f"The tag {tag} is already registered as a group, this tag will not be registered. "
"This may affect tasks you want to call."
)
break
else:
tasks_and_groups[tag]["task"].append(task)
# TODO: remove group in next release # TODO: remove group in next release
print_info = True print_info = True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment