Commit 0eb94c8b authored by lintangsutawika's avatar lintangsutawika
Browse files

process aggregate fix

parent b2598de8
...@@ -386,9 +386,6 @@ def evaluate( ...@@ -386,9 +386,6 @@ def evaluate(
task_score = task.aggregation()[metric](items) task_score = task.aggregation()[metric](items)
results[task_name][metric + "," + key] = task_score results[task_name][metric + "," + key] = task_score
# if task_name not in benchmark_agg:
# benchmark[] = [task_score]
# Need to put back in results # Need to put back in results
# pythia | acc # pythia | acc
# | perplexity # | perplexity
...@@ -415,7 +412,7 @@ def evaluate( ...@@ -415,7 +412,7 @@ def evaluate(
if stderr is not None: if stderr is not None:
results[task_name][metric + "_stderr" + "," + key] = stderr(items) results[task_name][metric + "_stderr" + "," + key] = stderr(items)
if not bool(aggregate): if bool(aggregate):
for group in aggregate.keys(): for group in aggregate.keys():
for metric in aggregate[group].keys(): for metric in aggregate[group].keys():
aggregate[group][metric] = np.average(aggregate[group][metric]) aggregate[group][metric] = np.average(aggregate[group][metric])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment