Treat tags in python tasks the same as yaml tasks (#2288)

* Treat python tasks same as yaml tasks. * Add tests. * Re-add fixture decorators. * Fix typing specification error for Python 3.9.

Treat tags in python tasks the same as yaml tasks (#2288)
* Treat python tasks same as yaml tasks. * Add tests. * Re-add fixture decorators. * Fix typing specification error for Python 3.9.
b2bf7bc4 · Giulio Lovisotto · GitHub · 72d619ff · b2bf7bc4 · b2bf7bc4
Unverified Commit b2bf7bc4 authored Sep 26, 2024 by Giulio Lovisotto Committed by GitHub Sep 26, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 123 additions and 37 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +50 -37

tests/test_task_manager.py tests/test_task_manager.py +73 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -40,7 +40,11 @@ class TaskManager:
            [x for x in self._all_tasks if self._task_index[x]["type"] == "group"]
        )
        self._all_subtasks = sorted(
-            [x for x in self._all_tasks if self._task_index[x]["type"] == "task"]
+            [
+                x
+                for x in self._all_tasks
+                if self._task_index[x]["type"] in ["task", "python_task"]
+            ]
        )
        self._all_tags = sorted(
            [x for x in self._all_tasks if self._task_index[x]["type"] == "tag"]
@@ -436,6 +440,43 @@ class TaskManager:
        :return
            Dictionary of task names as key and task metadata
        """
+
+        def _populate_tags_and_groups(config, task, tasks_and_groups, print_info):
+            # TODO: remove group in next release
+            for attr in ["tag", "group"]:
+                if attr in config:
+                    if attr == "group" and print_info:
+                        self.logger.info(
+                            "`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
+                            "The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
+                            "`group`s which aggregate across subtasks must be only defined in a separate group config file, "
+                            "which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
+                            "Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
+                            "for more information."
+                        )
+                        print_info = False
+                        # attr = "tag"
+
+                    attr_list = config[attr]
+                    if isinstance(attr_list, str):
+                        attr_list = [attr_list]
+
+                    for tag in attr_list:
+                        if tag not in tasks_and_groups:
+                            tasks_and_groups[tag] = {
+                                "type": "tag",
+                                "task": [task],
+                                "yaml_path": -1,
+                            }
+                        elif tasks_and_groups[tag]["type"] != "tag":
+                            self.logger.info(
+                                f"The tag {tag} is already registered as a group, this tag will not be registered. "
+                                "This may affect tasks you want to call."
+                            )
+                            break
+                        else:
+                            tasks_and_groups[tag]["task"].append(task)
+
        # TODO: remove group in next release
        print_info = True
        ignore_dirs = [
@@ -451,10 +492,14 @@ class TaskManager:
                    config = utils.load_yaml_config(yaml_path, mode="simple")
                    if self._config_is_python_task(config):
                        # This is a python class config
-                        tasks_and_groups[config["task"]] = {
+                        task = config["task"]
+                        tasks_and_groups[task] = {
                            "type": "python_task",
                            "yaml_path": yaml_path,
                        }
+                        _populate_tags_and_groups(
+                            config, task, tasks_and_groups, print_info
+                        )
                    elif self._config_is_group(config):
                        # This is a group config
                        tasks_and_groups[config["group"]] = {
@@ -483,41 +528,9 @@ class TaskManager:
                            "type": "task",
                            "yaml_path": yaml_path,
                        }
-
-                        # TODO: remove group in next release
-                        for attr in ["tag", "group"]:
-                            if attr in config:
-                                if attr == "group" and print_info:
-                                    self.logger.info(
-                                        "`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
-                                        "The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
-                                        "`group`s which aggregate across subtasks must be only defined in a separate group config file, "
-                                        "which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
-                                        "Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
-                                        "for more information."
-                                    )
-                                    print_info = False
-                                    # attr = "tag"
-
-                                attr_list = config[attr]
-                                if isinstance(attr_list, str):
-                                    attr_list = [attr_list]
-
-                                for tag in attr_list:
-                                    if tag not in tasks_and_groups:
-                                        tasks_and_groups[tag] = {
-                                            "type": "tag",
-                                            "task": [task],
-                                            "yaml_path": -1,
-                                        }
-                                    elif tasks_and_groups[tag]["type"] != "tag":
-                                        self.logger.info(
-                                            f"The tag {tag} is already registered as a group, this tag will not be registered. "
-                                            "This may affect tasks you want to call."
-                                        )
-                                        break
-                                    else:
-                                        tasks_and_groups[tag]["task"].append(task)
+                        _populate_tags_and_groups(
+                            config, task, tasks_and_groups, print_info
+                        )
                    else:
                        self.logger.debug(f"File {f} in {root} could not be loaded")


--- a/tests/test_task_manager.py
+++ b/tests/test_task_manager.py
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from lm_eval.tasks import TaskManager
+
+
+@pytest.fixture(scope="module")
+def custom_task_name():
+    return "zzz_my_python_task"
+
+
+@pytest.fixture(scope="module")
+def custom_task_tag():
+    return "zzz-tag"
+
+
+@pytest.fixture(scope="module")
+def task_yaml(pytestconfig, custom_task_name, custom_task_tag):
+    yield f"""include: {pytestconfig.rootpath}/lm_eval/tasks/hellaswag/hellaswag.yaml
+task: {custom_task_name}
+class: !function {custom_task_name}.MockPythonTask
+tag:
+  - {custom_task_tag}
+"""
+
+
+@pytest.fixture(scope="module")
+def task_code():
+    return """
+from lm_eval.tasks import ConfigurableTask
+
+class MockPythonTask(ConfigurableTask):
+
+    def __init__(
+        self,
+        data_dir=None,
+        cache_dir=None,
+        download_mode=None,
+        config=None,
+    ) -> None:
+        config.pop("class")
+        super().__init__(data_dir, cache_dir, download_mode, config)
+"""
+
+
+@pytest.fixture(scope="module")
+def custom_task_files_dir(task_yaml, task_code, custom_task_name):
+    with tempfile.TemporaryDirectory() as temp_dir:
+        yaml_path = Path(temp_dir) / f"{custom_task_name}.yaml"
+        with open(yaml_path, "w") as f:
+            f.write(task_yaml)
+        pysource_path = Path(temp_dir) / f"{custom_task_name}.py"
+        with open(pysource_path, "w") as f:
+            f.write(task_code)
+        yield temp_dir
+
+
+def test_python_task_inclusion(
+    custom_task_files_dir: Path, custom_task_name: str, custom_task_tag: str
+):
+    task_manager = TaskManager(
+        verbosity="INFO", include_path=str(custom_task_files_dir)
+    )
+    # check if python tasks enters the global task_index
+    assert custom_task_name in task_manager.task_index
+    # check if subtask is present
+    assert custom_task_name in task_manager.all_subtasks
+    # check if tag is present
+    assert custom_task_tag in task_manager.all_tags
+    # check if it can be loaded by tag (custom_task_tag)
+    assert custom_task_name in task_manager.load_task_or_group(custom_task_tag)