Unverified Commit 9ef853ac authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Revert "Simplified `evaluator.py`" (#1116)

parent 72e583d5
...@@ -234,7 +234,8 @@ def evaluate( ...@@ -234,7 +234,8 @@ def evaluate(
padding_requests = collections.defaultdict(int) padding_requests = collections.defaultdict(int)
# store the hierarchy to do proper ordering # store the hierarchy to do proper ordering
task_hierarchy = collections.defaultdict(list) task_hierarchy = collections.defaultdict(list)
# store task aliases # store the ordering of tasks and groups
task_order = collections.defaultdict(int)
task_group_alias = collections.defaultdict(dict) task_group_alias = collections.defaultdict(dict)
# store num-fewshot value per task # store num-fewshot value per task
num_fewshot = collections.defaultdict(int) num_fewshot = collections.defaultdict(int)
...@@ -439,6 +440,32 @@ def evaluate( ...@@ -439,6 +440,32 @@ def evaluate(
vals = vals_torch vals = vals_torch
if lm.rank == 0: if lm.rank == 0:
### Get task ordering for correct sample-wide aggregation
group_to_task = {}
for group in task_hierarchy.keys():
if group not in task_order:
task_order[group] = 0
if len(task_hierarchy[group]) > 0:
group_to_task[group] = task_hierarchy[group].copy()
for task in task_hierarchy[group]:
if task in task_order:
task_order[task] += 1
else:
task_order[task] = 1 + task_order[group]
if task in task_hierarchy:
group_to_task[group].remove(task)
group_to_task[group].extend(task_hierarchy[task])
task_to_group = {}
for group in group_to_task:
for task in group_to_task[group]:
if task in task_to_group:
task_to_group[task].append(group)
else:
task_to_group[task] = [group]
### Aggregate results over all datapoints ### ### Aggregate results over all datapoints ###
# aggregate results ; run bootstrap CIs # aggregate results ; run bootstrap CIs
...@@ -526,36 +553,37 @@ def evaluate( ...@@ -526,36 +553,37 @@ def evaluate(
results[group]["samples"] = total_size results[group]["samples"] = total_size
def print_tasks(task_hierarchy, tab=0): def print_tasks(task_hierarchy, task_order, task_version, task_group_alias):
results_agg = collections.defaultdict(dict) results_agg = collections.defaultdict(dict)
groups_agg = collections.defaultdict(dict) groups_agg = collections.defaultdict(dict)
for group_name, task_list in task_hierarchy.items():
order = task_order[group_name]
results_agg[group_name] = results[group_name].copy()
results_agg[group_name]["tab"] = order
(group_name, task_list), *_ = task_hierarchy.items() if (order < max(task_order.values())) and (len(task_list) > 0):
task_list = sorted(task_list) groups_agg[group_name] = results[group_name].copy()
groups_agg[group_name]["tab"] = order
results_agg[group_name] = results[group_name].copy()
results_agg[group_name]["tab"] = tab
if len(task_list) > 0: if task_list != []:
groups_agg[group_name] = results[group_name].copy() for task in sorted(task_list):
groups_agg[group_name]["tab"] = tab if task in task_hierarchy:
_task_hierarchy = {task: task_hierarchy[task]}
else:
_task_hierarchy = {task: []}
for task_name in task_list: _results_agg, _groups_agg, task_version = print_tasks(
if task_name in task_hierarchy: _task_hierarchy, task_order, task_version, task_group_alias
_task_hierarchy = { )
**{task_name: task_hierarchy[task_name]},
**task_hierarchy,
}
else:
_task_hierarchy = {task_name: []}
_results_agg, _groups_agg = print_tasks(_task_hierarchy, tab + 1) results_agg = {**results_agg, **_results_agg}
results_agg = {**results_agg, **_results_agg} groups_agg = {**groups_agg, **_groups_agg}
groups_agg = {**groups_agg, **_groups_agg}
return results_agg, groups_agg return results_agg, groups_agg, task_version
results_agg, groups_agg = print_tasks(task_hierarchy) results_agg, groups_agg, versions = print_tasks(
task_hierarchy, task_order, versions, task_group_alias
)
for task in results_agg: for task in results_agg:
task_results = results_agg[task] task_results = results_agg[task]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment