Commit c6b8132d authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'group-agg-rework' of...

Merge branch 'group-agg-rework' of https://github.com/EleutherAI/lm-evaluation-harness into group-agg-rework
parents 2f2322b9 9f06432c
...@@ -541,15 +541,13 @@ def evaluate( ...@@ -541,15 +541,13 @@ def evaluate(
task_hierarchy = {} task_hierarchy = {}
for group_or_task, group_or_task_info in task_dict.items(): for group_or_task, group_or_task_info in task_dict.items():
# Convert to string
if isinstance(group_or_task, ConfigurableGroup): if isinstance(group_or_task, ConfigurableGroup):
group_config = group_or_task.config group_config = group_or_task.config
group_or_task = group_or_task.group group_or_task = group_or_task.group
show_group_table = ( else:
show_group_table | group_config["aggregate_metric"] group_config = None
)
if group_config["aggregate_metric"] is False:
results[group_or_task][" "] = " "
continue
if isinstance(group_or_task_info, ConfigurableTask): if isinstance(group_or_task_info, ConfigurableTask):
if task_root: if task_root:
...@@ -570,6 +568,14 @@ def evaluate( ...@@ -570,6 +568,14 @@ def evaluate(
task_hierarchy.get(group_or_task, []) task_hierarchy.get(group_or_task, [])
) )
if (group_config is not None) and (group_config["aggregate_metric"] is False):
results[group_or_task][" "] = " "
continue
show_group_table = (
show_group_table | group_config["aggregate_metric"]
)
task_list = _task_hierarchy[group_or_task] task_list = _task_hierarchy[group_or_task]
metric_list = list( metric_list = list(
{ {
......
...@@ -192,7 +192,9 @@ class TaskManager: ...@@ -192,7 +192,9 @@ class TaskManager:
} }
subtask_list = self._get_tasklist(name) subtask_list = self._get_tasklist(name)
if subtask_list == -1: if subtask_list == -1:
subtask_list = self._get_config(name)["task"] group_config = self._get_config(name)
subtask_list = group_config["task"]
group_name = ConfigurableGroup(config=group_config)
else: else:
if self._name_is_registered(name): if self._name_is_registered(name):
base_task_config = self._get_config(name) base_task_config = self._get_config(name)
......
group: truthfulqa
group_alias: Truthful QA
task:
- truthfulqa_gen
- truthfulqa_mc1
- truthfulqa_mc2
group:
- truthfulqa
task: truthfulqa_gen task: truthfulqa_gen
dataset_path: truthful_qa dataset_path: truthful_qa
dataset_name: generation dataset_name: generation
......
group:
- truthfulqa
task: truthfulqa_mc1 task: truthfulqa_mc1
dataset_path: truthful_qa dataset_path: truthful_qa
dataset_name: multiple_choice dataset_name: multiple_choice
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment