Commit 2a817535 authored by lintangsutawika's avatar lintangsutawika
Browse files

add version for groups

parent 9f698c20
...@@ -528,6 +528,7 @@ def evaluate( ...@@ -528,6 +528,7 @@ def evaluate(
def process_group( def process_group(
results, results,
versions,
task_dict, task_dict,
task_root=None, task_root=None,
task_hierarchy=None, task_hierarchy=None,
...@@ -556,8 +557,9 @@ def evaluate( ...@@ -556,8 +557,9 @@ def evaluate(
group_or_task group_or_task
) )
else: else:
results, _task_hierarchy, show_group_table = process_group( results, versions, _task_hierarchy, show_group_table = process_group(
results, results,
versions,
group_or_task_info, group_or_task_info,
group_or_task, group_or_task,
task_hierarchy, task_hierarchy,
...@@ -620,10 +622,11 @@ def evaluate( ...@@ -620,10 +622,11 @@ def evaluate(
# results[group][stderr] = lm_eval.api.metrics.combined_sample_stderr(stderrs, sizes, metrics=metrics) # results[group][stderr] = lm_eval.api.metrics.combined_sample_stderr(stderrs, sizes, metrics=metrics)
results[group_or_task]["samples"] = sum(sizes) results[group_or_task]["samples"] = sum(sizes)
return results, task_hierarchy, show_group_table versions[group_or_task] = group_config["version"]
return results, versions, task_hierarchy, show_group_table
results, task_hierarchy, show_group_table = process_group( results, versions, task_hierarchy, show_group_table = process_group(
results, task_dict results, versions, task_dict
) )
results_agg, group_agg = prepare_print_tasks(task_dict, results) results_agg, group_agg = prepare_print_tasks(task_dict, results)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment