Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
f7aaff08
Commit
f7aaff08
authored
Feb 22, 2022
by
Stephen Hogg
Browse files
Mark unanswerable as TODO
parent
be55ea8c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
6 deletions
+5
-6
lm_eval/tasks/qasper.py
lm_eval/tasks/qasper.py
+5
-6
No files found.
lm_eval/tasks/qasper.py
View file @
f7aaff08
...
...
@@ -24,6 +24,7 @@ https://arxiv.org/abs/2105.03011
"""
from
collections
import
Counter
from
math
import
exp
import
random
import
re
import
string
from
lm_eval.base
import
rf
...
...
@@ -157,10 +158,10 @@ class QASPER(HFTask):
ll_yes
,
ll_no
,
(
logprob_unanswerable
,
_
)
=
results
res_dict
=
{}
# Handle unanswerability first
unanswerable_gold
=
doc
[
"answer_type"
]
==
"unanswerable"
unanswerable_pred
=
exp
(
logprob_unanswerable
)
>
1
-
exp
(
logprob_unanswerable
)
res_dict
[
"f1_unanswerable"
]
=
(
unanswerable_gold
,
unanswerable_pred
)
#
TODO:
Handle unanswerability first
#
unanswerable_gold = doc["answer_type"] == "unanswerable"
#
unanswerable_pred = exp(logprob_unanswerable)
#
res_dict["f1_unanswerable"] = (unanswerable_gold, unanswerable_pred)
# Handle yes/no questions
if
doc
[
"answer_type"
]
==
"bool"
:
...
...
@@ -179,7 +180,6 @@ class QASPER(HFTask):
def
aggregation
(
self
):
return
{
"f1_unanswerable"
:
f1_score
,
"f1_yesno"
:
f1_score
,
"f1_abstractive"
:
mean
,
}
...
...
@@ -212,7 +212,6 @@ class QASPER(HFTask):
whether a higher value of the submetric is better
"""
return
{
"f1_unanswerable"
:
True
,
"f1_yesno"
:
True
,
"f1_abstractive"
:
True
,
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment