Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
79492627
Commit
79492627
authored
Feb 08, 2022
by
Stephen Hogg
Browse files
Add initial draft of QASPER; register with package; yet to complete process_results
parent
05590e11
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
92 additions
and
0 deletions
+92
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+3
-0
lm_eval/tasks/qasper.py
lm_eval/tasks/qasper.py
+89
-0
No files found.
lm_eval/tasks/__init__.py
View file @
79492627
...
@@ -29,6 +29,7 @@ from . import triviaqa
...
@@ -29,6 +29,7 @@ from . import triviaqa
from
.
import
pubmedqa
from
.
import
pubmedqa
from
.
import
sciq
from
.
import
sciq
from
.
import
webqs
from
.
import
webqs
from
.
import
qasper
from
.
import
qa4mre
from
.
import
qa4mre
from
.
import
translation
from
.
import
translation
from
.
import
headqa
from
.
import
headqa
...
@@ -121,6 +122,8 @@ TASK_REGISTRY = {
...
@@ -121,6 +122,8 @@ TASK_REGISTRY = {
"pubmedqa"
:
pubmedqa
.
Pubmed_QA
,
"pubmedqa"
:
pubmedqa
.
Pubmed_QA
,
"sciq"
:
sciq
.
SciQ
,
"sciq"
:
sciq
.
SciQ
,
"qasper"
:
qasper
.
QASPER
,
"qa4mre_2011"
:
qa4mre
.
QA4MRE_2011
,
"qa4mre_2011"
:
qa4mre
.
QA4MRE_2011
,
"qa4mre_2012"
:
qa4mre
.
QA4MRE_2012
,
"qa4mre_2012"
:
qa4mre
.
QA4MRE_2012
,
"qa4mre_2013"
:
qa4mre
.
QA4MRE_2013
,
"qa4mre_2013"
:
qa4mre
.
QA4MRE_2013
,
...
...
lm_eval/tasks/qasper.py
0 → 100644
View file @
79492627
from
lm_eval.base
import
rf
from
.common
import
HFTask
class
QASPER
(
HFTask
):
VERSION
=
0
DATASET_PATH
=
"qasper"
DATASET_NAME
=
None
def
doc_to_text
(
self
,
doc
):
# this method is invoked by tests only
return
(
"TITLE: "
+
doc
[
"title"
]
+
"
\n
"
+
"ABSTRACT: "
+
doc
[
"abstract"
]
+
"
\n\n
"
+
"Q: "
+
doc
[
"question"
]
+
"
\n\n
"
+
"A: "
)
def
doc_to_target
(
self
,
doc
):
# this method is invoked by tests only
return
" "
+
doc
[
"answer_str"
]
def
training_docs
(
self
):
for
doc
in
self
.
data
[
"train"
]:
yield
from
self
.
process_doc
(
doc
)
def
validation_docs
(
self
):
for
doc
in
self
.
data
[
"train"
]:
yield
from
self
.
process_doc
(
doc
)
def
process_doc
(
self
,
doc
):
"""Given a `doc`, flatten it out so that each JSON blob
contains exactly one question and one answer. Logic taken from
the reference implementation available at
https://github.com/allenai/qasper-led-baseline/blob/main/scripts/evaluator.py
"""
obs_list
=
[]
for
qa
in
doc
[
"qas"
]:
for
question
,
answer_list
in
zip
(
qa
[
"question"
],
qa
[
"answers"
]):
for
answer
in
answer_list
:
if
answer
[
"unanswerable"
]:
answer_str
=
"unanswerable"
answer_type
=
"unanswerable"
elif
answer
[
"yes_no"
]:
answer_str
=
"Yes"
answer_type
=
"bool"
elif
answer
[
"yes_no"
]
is
not
None
:
answer_str
=
"No"
answer_type
=
"bool"
elif
answer
[
"free_form_answer"
]:
answer_str
=
answer
[
"free_form_answer"
]
answer_type
=
"free form answer"
elif
answer
[
"extractive_spans"
]:
answer_str
=
", "
.
join
(
answer
[
"extractive_spans"
])
answer_type
=
"extractive spans"
obs_list
.
append
[
{
"title"
:
doc
[
"title"
],
"abstract"
:
doc
[
"abstract"
],
"question"
:
question
,
"answer_str"
:
answer_str
,
"answer_type"
:
answer_type
,
}
]
return
obs_list
def
process_results
(
self
,
doc
,
results
):
return
super
().
process_results
(
doc
,
results
)
def
construct_requests
(
self
,
doc
,
ctx
):
"""Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
:param doc:
The document as returned from training_docs, validation_docs, or test_docs.
:param ctx: str
The context string, generated by fewshot_context. This includes the natural
language description, as well as the few shot examples, and the question
part of the document for `doc`.
"""
continuation
=
rf
.
greedy_until
(
ctx
,
[
"
\n
"
])
is_unanswerable
=
rf
.
loglikelihood
(
ctx
,
" "
+
"unanswerable"
)
return
continuation
,
is_unanswerable
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment