Commit 85ffb7fc authored by Leo Gao's avatar Leo Gao
Browse files

Add underscores to metrics

parent 0538d4c9
......@@ -294,69 +294,69 @@ class TruthfulQAGeneration(Task):
rougeL_acc = int(rougeL_correct > rougeL_incorrect)
return {
"bleurt max": bleurt_max,
"bleurt acc": bleurt_acc,
"bleurt diff": bleurt_diff,
"bleurt_max": bleurt_max,
"bleurt_acc": bleurt_acc,
"bleurt_diff": bleurt_diff,
"bleu max": bleu_max,
"bleu acc": bleu_acc,
"bleu diff": bleu_diff,
"bleu_max": bleu_max,
"bleu_acc": bleu_acc,
"bleu_diff": bleu_diff,
"rouge1 max": rouge1_max,
"rouge1 acc": rouge1_acc,
"rouge1 diff": rouge1_diff,
"rouge1_max": rouge1_max,
"rouge1_acc": rouge1_acc,
"rouge1_diff": rouge1_diff,
"rouge2 max": rouge2_max,
"rouge2 acc": rouge2_acc,
"rouge2 diff": rouge2_diff,
"rouge2_max": rouge2_max,
"rouge2_acc": rouge2_acc,
"rouge2_diff": rouge2_diff,
"rougeL max": rougeL_max,
"rougeL acc": rougeL_acc,
"rougeL diff": rougeL_diff,
"rougeL_max": rougeL_max,
"rougeL_acc": rougeL_acc,
"rougeL_diff": rougeL_diff,
}
def aggregation(self):
return {
"bleurt max": mean,
"bleurt acc": mean,
"bleurt diff": mean,
"bleurt_max": mean,
"bleurt_acc": mean,
"bleurt_diff": mean,
"bleu max": mean,
"bleu acc": mean,
"bleu diff": mean,
"bleu_max": mean,
"bleu_acc": mean,
"bleu_diff": mean,
"rouge1 max": mean,
"rouge1 acc": mean,
"rouge1 diff": mean,
"rouge1_max": mean,
"rouge1_acc": mean,
"rouge1_diff": mean,
"rouge2 max": mean,
"rouge2 acc": mean,
"rouge2 diff": mean,
"rouge2_max": mean,
"rouge2_acc": mean,
"rouge2_diff": mean,
"rougeL max": mean,
"rougeL acc": mean,
"rougeL diff": mean,
"rougeL_max": mean,
"rougeL_acc": mean,
"rougeL_diff": mean,
}
def higher_is_better(self):
return {
"bleurt max": True,
"bleurt acc": True,
"bleurt diff": True,
"bleurt_max": True,
"bleurt_acc": True,
"bleurt_diff": True,
"bleu max": True,
"bleu acc": True,
"bleu diff": True,
"bleu_max": True,
"bleu_acc": True,
"bleu_diff": True,
"rouge1 max": True,
"rouge1 acc": True,
"rouge1 diff": True,
"rouge1_max": True,
"rouge1_acc": True,
"rouge1_diff": True,
"rouge2 max": True,
"rouge2 acc": True,
"rouge2 diff": True,
"rouge2_max": True,
"rouge2_acc": True,
"rouge2_diff": True,
"rougeL max": True,
"rougeL acc": True,
"rougeL diff": True,
"rougeL_max": True,
"rougeL_acc": True,
"rougeL_diff": True,
}
{"results": {"truthfulqa_gen": {"bleu acc": 0.0, "bleu acc_stderr": 0.0, "bleu diff": 0.0, "bleu diff_stderr": 0.0, "bleu max": 0.0, "bleu max_stderr": 0.0, "bleurt acc": 0.835985312117503, "bleurt acc_stderr": 0.012962704327492454, "bleurt diff": 0.14077322143090107, "bleurt diff_stderr": 0.005459888909582694, "bleurt max": -1.4399358725752065, "bleurt max_stderr": 0.0022126992369197133, "rouge1 acc": 0.0, "rouge1 acc_stderr": 0.0, "rouge1 diff": 0.0, "rouge1 diff_stderr": 0.0, "rouge1 max": 0.0, "rouge1 max_stderr": 0.0, "rouge2 acc": 0.0, "rouge2 acc_stderr": 0.0, "rouge2 diff": 0.0, "rouge2 diff_stderr": 0.0, "rouge2 max": 0.0, "rouge2 max_stderr": 0.0, "rougeL acc": 0.0, "rougeL acc_stderr": 0.0, "rougeL diff": 0.0, "rougeL diff_stderr": 0.0, "rougeL max": 0.0, "rougeL max_stderr": 0.0}}, "versions": {"truthfulqa_gen": 0}}
\ No newline at end of file
{"results": {"truthfulqa_gen": {"bleu_acc": 0.0, "bleu_acc_stderr": 0.0, "bleu_diff": 0.0, "bleu_diff_stderr": 0.0, "bleu_max": 0.0, "bleu_max_stderr": 0.0, "bleurt_acc": 0.835985312117503, "bleurt_acc_stderr": 0.012962704327492454, "bleurt_diff": 0.14077322143090107, "bleurt_diff_stderr": 0.005459888909582694, "bleurt_max": -1.4399358725752065, "bleurt_max_stderr": 0.0022126992369197133, "rouge1_acc": 0.0, "rouge1_acc_stderr": 0.0, "rouge1_diff": 0.0, "rouge1_diff_stderr": 0.0, "rouge1_max": 0.0, "rouge1_max_stderr": 0.0, "rouge2_acc": 0.0, "rouge2_acc_stderr": 0.0, "rouge2_diff": 0.0, "rouge2_diff_stderr": 0.0, "rouge2_max": 0.0, "rouge2_max_stderr": 0.0, "rougeL_acc": 0.0, "rougeL_acc_stderr": 0.0, "rougeL_diff": 0.0, "rougeL_diff_stderr": 0.0, "rougeL_max": 0.0, "rougeL_max_stderr": 0.0}}, "versions": {"truthfulqa_gen": 0}}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment