Fix Llava-1.5-hf ; Update to version 0.4.5 (#2388)

2576a8cb · Hailey Schoelkopf · GitHub · 1ed1f9ed · 2576a8cb · 2576a8cb
Unverified Commit 2576a8cb authored Oct 08, 2024 by Hailey Schoelkopf Committed by GitHub Oct 08, 2024
4 changed files
--- a/lm_eval/api/task.py
+++ b/lm_eval/api/task.py
@@ -57,7 +57,6 @@ class TaskConfig(dict):
    task: Optional[str] = None
    task_alias: Optional[str] = None
    tag: Optional[Union[str, list]] = None
-    group: Optional[Union[str, list]] = None
    # HF dataset options.
    # which dataset to use,
    # and what splits for what purpose
@@ -98,18 +97,6 @@ class TaskConfig(dict):
    )
    def __post_init__(self) -> None:
-        if self.group is not None:
-            eval_logger.warning(
-                "A task YAML file was found to contain a `group` key. Groups which provide aggregate scores over several subtasks now require a separate config file--if not aggregating, you may want to use the `tag` config option instead within your config. Setting `group` within a TaskConfig will be deprecated in v0.4.4. Please see https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/task_guide.md for more information."
-            )
-            if self.tag is None:
-                self.tag = self.group
-            else:
-                raise ValueError(
-                    "Got both a `group` and `tag` entry within a TaskConfig. Please use one or the other--`group` values will be deprecated in v0.4.4."
-                )
        if self.generation_kwargs is not None:
            if self.output_type != "generate_until":
                eval_logger.warning(

--- a/lm_eval/models/hf_vlms.py
+++ b/lm_eval/models/hf_vlms.py
@@ -13,6 +13,7 @@ from lm_eval.api.registry import register_model
 from lm_eval.models.huggingface import HFLM
 from lm_eval.models.utils import (
    Collator,
+    flatten_image_list,
    pad_and_concat,
    replace_placeholders,
    stop_sequences_criteria,
@@ -293,6 +294,10 @@ class HFMultimodalLM(HFLM):
        if self.rgb:
            images = [[img.convert("RGB") for img in sublist] for sublist in images]
+        # certain models like llava expect a single-level image list even for bs>1, multi-image. TODO: port this over to loglikelihoods
+        if getattr(self.config, "model_type", "") == "llava":
+            images = flatten_image_list(images)
        encoding = self.processor(
            images=images,
            text=strings,

--- a/lm_eval/models/utils.py
+++ b/lm_eval/models/utils.py
@@ -698,3 +698,14 @@ def replace_placeholders(
    # Add the last part of the string
    result.append(parts[-1])
    return "".join(result)
+def flatten_image_list(images: List[List]):
+    """
+    Takes in a list of lists of images, and returns a single list of all images in order.
+    Used for some multimodal models like Llava-1.5 which expects this flattened-list format for its image processor.
+    :param images: A list of lists of PIL images.
+    :return: a list of PIL images, via concatenating all the sub-lists in order.
+    """
+    return [image for image_list in images for image in image_list]
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -443,39 +443,26 @@ class TaskManager:
        def _populate_tags_and_groups(config, task, tasks_and_groups, print_info):
            # TODO: remove group in next release
-            for attr in ["tag", "group"]:
+            if "tag" in config:
-                if attr in config:
+                attr_list = config["tag"]
-                    if attr == "group" and print_info:
+                if isinstance(attr_list, str):
+                    attr_list = [attr_list]
+                for tag in attr_list:
+                    if tag not in tasks_and_groups:
+                        tasks_and_groups[tag] = {
+                            "type": "tag",
+                            "task": [task],
+                            "yaml_path": -1,
+                        }
+                    elif tasks_and_groups[tag]["type"] != "tag":
                        self.logger.info(
-                            "`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
+                            f"The tag '{tag}' is already registered as a group, this tag will not be registered. "
-                            "The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
+                            "This may affect tasks you want to call."
-                            "`group`s which aggregate across subtasks must be only defined in a separate group config file, "
-                            "which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
-                            "Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
-                            "for more information."
                        )
-                        print_info = False
+                        break
-                        # attr = "tag"
+                    else:
+                        tasks_and_groups[tag]["task"].append(task)
-                    attr_list = config[attr]
-                    if isinstance(attr_list, str):
-                        attr_list = [attr_list]
-                    for tag in attr_list:
-                        if tag not in tasks_and_groups:
-                            tasks_and_groups[tag] = {
-                                "type": "tag",
-                                "task": [task],
-                                "yaml_path": -1,
-                            }
-                        elif tasks_and_groups[tag]["type"] != "tag":
-                            self.logger.info(
-                                f"The tag {tag} is already registered as a group, this tag will not be registered. "
-                                "This may affect tasks you want to call."
-                            )
-                            break
-                        else:
-                            tasks_and_groups[tag]["task"].append(task)
        # TODO: remove group in next release
        print_info = True