Commit 2d96a8c8 authored by lintangsutawika's avatar lintangsutawika
Browse files

add condition if --task is not a benchmark

parent ed304c1d
...@@ -398,11 +398,12 @@ def evaluate( ...@@ -398,11 +398,12 @@ def evaluate(
# | word_perplexity # | word_perplexity
# | byte_perplexity # | byte_perplexity
# | bits_per_byte # | bits_per_byte
group_name = task_groups[task_name] if bool(task_groups):
if metric not in aggregate[group_name]: group_name = task_groups[task_name]
aggregate[group_name][metric] = [task_score] if metric not in aggregate[group_name]:
else: aggregate[group_name][metric] = [task_score]
aggregate[group_name][metric].append(task_score) else:
aggregate[group_name][metric].append(task_score)
# hotfix: bleu, chrf, ter seem to be really expensive to bootstrap # hotfix: bleu, chrf, ter seem to be really expensive to bootstrap
# so we run them less iterations. still looking for a cleaner way to do this # so we run them less iterations. still looking for a cleaner way to do this
...@@ -417,14 +418,15 @@ def evaluate( ...@@ -417,14 +418,15 @@ def evaluate(
if stderr is not None: if stderr is not None:
results[task_name][metric + "_stderr" + "," + key] = stderr(items) results[task_name][metric + "_stderr" + "," + key] = stderr(items)
for group in aggregate.keys(): if not bool(aggregate):
for metric in aggregate[group].keys(): for group in aggregate.keys():
aggregate[group][metric] = np.average(aggregate[group][metric]) for metric in aggregate[group].keys():
versions[group] = "N/A" aggregate[group][metric] = np.average(aggregate[group][metric])
versions[group] = "N/A"
results_dict = { results_dict = {
"results": dict(results), "results": dict(results),
"aggregate": dict(aggregate), **({"aggregate": dict(aggregate)} if bool(aggregate) else {}),
"configs": dict(configs), "configs": dict(configs),
"versions": dict(versions), "versions": dict(versions),
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment