"yaml-reader/vscode:/vscode.git/clone" did not exist on "9d0e0c6a4871220b1e57aae65ee4b653bba871c8"
Commit 2f0ef6ca authored by lintangsutawika's avatar lintangsutawika
Browse files

fix condition where a benchmark and a regular task is called together

parent 32294ff1
......@@ -219,7 +219,6 @@ def evaluate(
padding_requests = collections.defaultdict(int)
# Stores group related keys and values for group-aggregation
aggregate = collections.defaultdict(dict)
task_groups = collections.defaultdict(dict)
# get lists of each type of request
......@@ -228,6 +227,7 @@ def evaluate(
if type(task) == tuple:
group, task = task
task_groups[task_name] = group
aggregate[task_name] = {}
versions[task_name] = task.VERSION
configs[task_name] = dict(task.dump_config())
......@@ -407,12 +407,12 @@ def evaluate(
# | word_perplexity
# | byte_perplexity
# | bits_per_byte
if bool(task_groups):
if task_name in task_groups:
group_name = task_groups[task_name]
if metric not in aggregate[group_name]:
aggregate[group_name][metric] = [task_score]
else:
if metric in list(aggregate[group_name].keys()):
aggregate[group_name][metric].append(task_score)
else:
aggregate[group_name][metric] = [task_score]
# hotfix: bleu, chrf, ter seem to be really expensive to bootstrap
# so we run them less iterations. still looking for a cleaner way to do this
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment