Commit c2f12474 authored by Stephen Hogg's avatar Stephen Hogg
Browse files

exponentiate unanswerable score; rename dict key

parent 7ed5e29e
...@@ -22,6 +22,7 @@ https://arxiv.org/abs/2105.03011 ...@@ -22,6 +22,7 @@ https://arxiv.org/abs/2105.03011
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
""" """
from math import exp
from lm_eval.base import rf from lm_eval.base import rf
from lm_eval.metrics import f1_score from lm_eval.metrics import f1_score
from .common import HFTask from .common import HFTask
...@@ -101,7 +102,7 @@ class QASPER(HFTask): ...@@ -101,7 +102,7 @@ class QASPER(HFTask):
# Handle unanswerability first # Handle unanswerability first
unanswerable_gold = doc["answer_type"] == "unanswerable" unanswerable_gold = doc["answer_type"] == "unanswerable"
unanswerable_pred = unanswerable > 1 - unanswerable unanswerable_pred = exp(unanswerable) > 1 - exp(unanswerable)
res_dict["f1_un"] = (unanswerable_gold, unanswerable_pred) res_dict["f1_un"] = (unanswerable_gold, unanswerable_pred)
# Handle yes/no questions # Handle yes/no questions
...@@ -113,14 +114,14 @@ class QASPER(HFTask): ...@@ -113,14 +114,14 @@ class QASPER(HFTask):
# Handle completions # Handle completions
if doc["answer_type"] == "free form answer": if doc["answer_type"] == "free form answer":
pass res_dict["f1_ab"] = None
return res_dict return res_dict
def aggregation(self): def aggregation(self):
return { return {
"f1_un": f1_score, "f1_un": f1_score,
"f1_yn": f1_score, "f1_yn": f1_score,
"f1_fr": f1_score, "f1_ab": f1_score,
"f1_ex": f1_score, "f1_ex": f1_score,
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment