Unverified Commit b2bf7bc4 authored by Giulio Lovisotto's avatar Giulio Lovisotto Committed by GitHub
Browse files

Treat tags in python tasks the same as yaml tasks (#2288)

* Treat python tasks same as yaml tasks.

* Add tests.

* Re-add fixture decorators.

* Fix typing specification error for Python 3.9.
parent 72d619ff
......@@ -40,7 +40,11 @@ class TaskManager:
[x for x in self._all_tasks if self._task_index[x]["type"] == "group"]
)
self._all_subtasks = sorted(
[x for x in self._all_tasks if self._task_index[x]["type"] == "task"]
[
x
for x in self._all_tasks
if self._task_index[x]["type"] in ["task", "python_task"]
]
)
self._all_tags = sorted(
[x for x in self._all_tasks if self._task_index[x]["type"] == "tag"]
......@@ -436,6 +440,43 @@ class TaskManager:
:return
Dictionary of task names as key and task metadata
"""
def _populate_tags_and_groups(config, task, tasks_and_groups, print_info):
# TODO: remove group in next release
for attr in ["tag", "group"]:
if attr in config:
if attr == "group" and print_info:
self.logger.info(
"`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
"The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
"`group`s which aggregate across subtasks must be only defined in a separate group config file, "
"which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
"Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
"for more information."
)
print_info = False
# attr = "tag"
attr_list = config[attr]
if isinstance(attr_list, str):
attr_list = [attr_list]
for tag in attr_list:
if tag not in tasks_and_groups:
tasks_and_groups[tag] = {
"type": "tag",
"task": [task],
"yaml_path": -1,
}
elif tasks_and_groups[tag]["type"] != "tag":
self.logger.info(
f"The tag {tag} is already registered as a group, this tag will not be registered. "
"This may affect tasks you want to call."
)
break
else:
tasks_and_groups[tag]["task"].append(task)
# TODO: remove group in next release
print_info = True
ignore_dirs = [
......@@ -451,10 +492,14 @@ class TaskManager:
config = utils.load_yaml_config(yaml_path, mode="simple")
if self._config_is_python_task(config):
# This is a python class config
tasks_and_groups[config["task"]] = {
task = config["task"]
tasks_and_groups[task] = {
"type": "python_task",
"yaml_path": yaml_path,
}
_populate_tags_and_groups(
config, task, tasks_and_groups, print_info
)
elif self._config_is_group(config):
# This is a group config
tasks_and_groups[config["group"]] = {
......@@ -483,41 +528,9 @@ class TaskManager:
"type": "task",
"yaml_path": yaml_path,
}
# TODO: remove group in next release
for attr in ["tag", "group"]:
if attr in config:
if attr == "group" and print_info:
self.logger.info(
"`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
"The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
"`group`s which aggregate across subtasks must be only defined in a separate group config file, "
"which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
"Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
"for more information."
)
print_info = False
# attr = "tag"
attr_list = config[attr]
if isinstance(attr_list, str):
attr_list = [attr_list]
for tag in attr_list:
if tag not in tasks_and_groups:
tasks_and_groups[tag] = {
"type": "tag",
"task": [task],
"yaml_path": -1,
}
elif tasks_and_groups[tag]["type"] != "tag":
self.logger.info(
f"The tag {tag} is already registered as a group, this tag will not be registered. "
"This may affect tasks you want to call."
)
break
else:
tasks_and_groups[tag]["task"].append(task)
_populate_tags_and_groups(
config, task, tasks_and_groups, print_info
)
else:
self.logger.debug(f"File {f} in {root} could not be loaded")
......
import tempfile
from pathlib import Path
import pytest
from lm_eval.tasks import TaskManager
@pytest.fixture(scope="module")
def custom_task_name():
return "zzz_my_python_task"
@pytest.fixture(scope="module")
def custom_task_tag():
return "zzz-tag"
@pytest.fixture(scope="module")
def task_yaml(pytestconfig, custom_task_name, custom_task_tag):
yield f"""include: {pytestconfig.rootpath}/lm_eval/tasks/hellaswag/hellaswag.yaml
task: {custom_task_name}
class: !function {custom_task_name}.MockPythonTask
tag:
- {custom_task_tag}
"""
@pytest.fixture(scope="module")
def task_code():
return """
from lm_eval.tasks import ConfigurableTask
class MockPythonTask(ConfigurableTask):
def __init__(
self,
data_dir=None,
cache_dir=None,
download_mode=None,
config=None,
) -> None:
config.pop("class")
super().__init__(data_dir, cache_dir, download_mode, config)
"""
@pytest.fixture(scope="module")
def custom_task_files_dir(task_yaml, task_code, custom_task_name):
with tempfile.TemporaryDirectory() as temp_dir:
yaml_path = Path(temp_dir) / f"{custom_task_name}.yaml"
with open(yaml_path, "w") as f:
f.write(task_yaml)
pysource_path = Path(temp_dir) / f"{custom_task_name}.py"
with open(pysource_path, "w") as f:
f.write(task_code)
yield temp_dir
def test_python_task_inclusion(
custom_task_files_dir: Path, custom_task_name: str, custom_task_tag: str
):
task_manager = TaskManager(
verbosity="INFO", include_path=str(custom_task_files_dir)
)
# check if python tasks enters the global task_index
assert custom_task_name in task_manager.task_index
# check if subtask is present
assert custom_task_name in task_manager.all_subtasks
# check if tag is present
assert custom_task_tag in task_manager.all_tags
# check if it can be loaded by tag (custom_task_tag)
assert custom_task_name in task_manager.load_task_or_group(custom_task_tag)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment