Commit 7760573f authored by lintangsutawika's avatar lintangsutawika
Browse files

enables tasks of different group but same task_alis (for example, if...

enables tasks of different group but same task_alis (for example, if evaluating on different versions of MMLU
parent 8ffd2630
......@@ -562,8 +562,6 @@ def evaluate(
task_hierarchy, task_order, versions, task_group_alias
)
_results_agg = collections.defaultdict(dict)
_versions = collections.defaultdict(dict)
for task in results_agg:
task_results = results_agg[task]
......@@ -577,13 +575,9 @@ def evaluate(
if task in task_group_alias:
task_alias = task_group_alias[task]
_results_agg[tab_string + task_alias] = task_results
_versions[tab_string + task_alias] = versions[task]
results_agg[task]["alias"] = tab_string + task_alias
else:
_results_agg[tab_string + task] = task_results
_versions[tab_string + task] = versions[task]
results_agg = _results_agg
versions = _versions
results_agg[task]["alias"] = tab_string + task
_groups_agg = collections.defaultdict(dict)
for group in groups_agg:
......@@ -599,10 +593,9 @@ def evaluate(
if group in task_group_alias:
group_alias = task_group_alias[group]
_groups_agg[tab_string + group_alias] = group_results
groups_agg[group]["alias"] = tab_string + group_alias
else:
_groups_agg[tab_string + group] = group_results
groups_agg = _groups_agg
groups_agg[group]["alias"] = tab_string + group
results_dict = {
"results": dict(results_agg.items()),
......
......@@ -305,6 +305,10 @@ def make_table(result_dict, column: str = "results"):
for k, dic in result_dict[column].items():
version = result_dict["versions"][k]
if "alias" in dic:
k = dic.pop("alias")
for (mf), v in dic.items():
m, _, f = mf.partition(",")
if m.endswith("_stderr"):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment