Commit 85ffb7fc authored by Leo Gao's avatar Leo Gao
Browse files

Add underscores to metrics

parent 0538d4c9
...@@ -294,69 +294,69 @@ class TruthfulQAGeneration(Task): ...@@ -294,69 +294,69 @@ class TruthfulQAGeneration(Task):
rougeL_acc = int(rougeL_correct > rougeL_incorrect) rougeL_acc = int(rougeL_correct > rougeL_incorrect)
return { return {
"bleurt max": bleurt_max, "bleurt_max": bleurt_max,
"bleurt acc": bleurt_acc, "bleurt_acc": bleurt_acc,
"bleurt diff": bleurt_diff, "bleurt_diff": bleurt_diff,
"bleu max": bleu_max, "bleu_max": bleu_max,
"bleu acc": bleu_acc, "bleu_acc": bleu_acc,
"bleu diff": bleu_diff, "bleu_diff": bleu_diff,
"rouge1 max": rouge1_max, "rouge1_max": rouge1_max,
"rouge1 acc": rouge1_acc, "rouge1_acc": rouge1_acc,
"rouge1 diff": rouge1_diff, "rouge1_diff": rouge1_diff,
"rouge2 max": rouge2_max, "rouge2_max": rouge2_max,
"rouge2 acc": rouge2_acc, "rouge2_acc": rouge2_acc,
"rouge2 diff": rouge2_diff, "rouge2_diff": rouge2_diff,
"rougeL max": rougeL_max, "rougeL_max": rougeL_max,
"rougeL acc": rougeL_acc, "rougeL_acc": rougeL_acc,
"rougeL diff": rougeL_diff, "rougeL_diff": rougeL_diff,
} }
def aggregation(self): def aggregation(self):
return { return {
"bleurt max": mean, "bleurt_max": mean,
"bleurt acc": mean, "bleurt_acc": mean,
"bleurt diff": mean, "bleurt_diff": mean,
"bleu max": mean, "bleu_max": mean,
"bleu acc": mean, "bleu_acc": mean,
"bleu diff": mean, "bleu_diff": mean,
"rouge1 max": mean, "rouge1_max": mean,
"rouge1 acc": mean, "rouge1_acc": mean,
"rouge1 diff": mean, "rouge1_diff": mean,
"rouge2 max": mean, "rouge2_max": mean,
"rouge2 acc": mean, "rouge2_acc": mean,
"rouge2 diff": mean, "rouge2_diff": mean,
"rougeL max": mean, "rougeL_max": mean,
"rougeL acc": mean, "rougeL_acc": mean,
"rougeL diff": mean, "rougeL_diff": mean,
} }
def higher_is_better(self): def higher_is_better(self):
return { return {
"bleurt max": True, "bleurt_max": True,
"bleurt acc": True, "bleurt_acc": True,
"bleurt diff": True, "bleurt_diff": True,
"bleu max": True, "bleu_max": True,
"bleu acc": True, "bleu_acc": True,
"bleu diff": True, "bleu_diff": True,
"rouge1 max": True, "rouge1_max": True,
"rouge1 acc": True, "rouge1_acc": True,
"rouge1 diff": True, "rouge1_diff": True,
"rouge2 max": True, "rouge2_max": True,
"rouge2 acc": True, "rouge2_acc": True,
"rouge2 diff": True, "rouge2_diff": True,
"rougeL max": True, "rougeL_max": True,
"rougeL acc": True, "rougeL_acc": True,
"rougeL diff": True, "rougeL_diff": True,
} }
{"results": {"truthfulqa_gen": {"bleu acc": 0.0, "bleu acc_stderr": 0.0, "bleu diff": 0.0, "bleu diff_stderr": 0.0, "bleu max": 0.0, "bleu max_stderr": 0.0, "bleurt acc": 0.835985312117503, "bleurt acc_stderr": 0.012962704327492454, "bleurt diff": 0.14077322143090107, "bleurt diff_stderr": 0.005459888909582694, "bleurt max": -1.4399358725752065, "bleurt max_stderr": 0.0022126992369197133, "rouge1 acc": 0.0, "rouge1 acc_stderr": 0.0, "rouge1 diff": 0.0, "rouge1 diff_stderr": 0.0, "rouge1 max": 0.0, "rouge1 max_stderr": 0.0, "rouge2 acc": 0.0, "rouge2 acc_stderr": 0.0, "rouge2 diff": 0.0, "rouge2 diff_stderr": 0.0, "rouge2 max": 0.0, "rouge2 max_stderr": 0.0, "rougeL acc": 0.0, "rougeL acc_stderr": 0.0, "rougeL diff": 0.0, "rougeL diff_stderr": 0.0, "rougeL max": 0.0, "rougeL max_stderr": 0.0}}, "versions": {"truthfulqa_gen": 0}} {"results": {"truthfulqa_gen": {"bleu_acc": 0.0, "bleu_acc_stderr": 0.0, "bleu_diff": 0.0, "bleu_diff_stderr": 0.0, "bleu_max": 0.0, "bleu_max_stderr": 0.0, "bleurt_acc": 0.835985312117503, "bleurt_acc_stderr": 0.012962704327492454, "bleurt_diff": 0.14077322143090107, "bleurt_diff_stderr": 0.005459888909582694, "bleurt_max": -1.4399358725752065, "bleurt_max_stderr": 0.0022126992369197133, "rouge1_acc": 0.0, "rouge1_acc_stderr": 0.0, "rouge1_diff": 0.0, "rouge1_diff_stderr": 0.0, "rouge1_max": 0.0, "rouge1_max_stderr": 0.0, "rouge2_acc": 0.0, "rouge2_acc_stderr": 0.0, "rouge2_diff": 0.0, "rouge2_diff_stderr": 0.0, "rouge2_max": 0.0, "rouge2_max_stderr": 0.0, "rougeL_acc": 0.0, "rougeL_acc_stderr": 0.0, "rougeL_diff": 0.0, "rougeL_diff_stderr": 0.0, "rougeL_max": 0.0, "rougeL_max_stderr": 0.0}}, "versions": {"truthfulqa_gen": 0}}
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment