Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
7614a8f3
Unverified
Commit
7614a8f3
authored
Feb 09, 2021
by
Leo Gao
Committed by
GitHub
Feb 09, 2021
Browse files
Merge pull request #139 from jeffhsu3/pubmedqa
QA4MRE Task
parents
e9e5d0a2
5960874b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
104 additions
and
5 deletions
+104
-5
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+6
-0
lm_eval/tasks/qa4mre.py
lm_eval/tasks/qa4mre.py
+92
-0
lm_eval/tasks/sciq.py
lm_eval/tasks/sciq.py
+6
-5
No files found.
lm_eval/tasks/__init__.py
View file @
7614a8f3
...
...
@@ -20,6 +20,7 @@ from . import triviaqa
from
.
import
pubmedqa
from
.
import
sciq
from
.
import
webqs
from
.
import
qa4mre
TASK_REGISTRY
=
{
...
...
@@ -48,8 +49,13 @@ TASK_REGISTRY = {
"lambada"
:
lambada
.
LAMBADA
,
"piqa"
:
piqa
.
PiQA
,
# Science related
"pubmedqa"
:
pubmedqa
.
Pubmed_QA
,
"sciq"
:
sciq
.
SciQ
,
#"qa4mre" : qa4mre.QA4MRE,
"qa4mre_2011"
:
qa4mre
.
QA4MRE_2011
,
"qa4mre_2012"
:
qa4mre
.
QA4MRE_2012
,
"qa4mre_2013"
:
qa4mre
.
QA4MRE_2013
,
#"triviaqa": triviaqa.TriviaQA,
"arc_easy"
:
arc
.
ARCEasy
,
...
...
lm_eval/tasks/qa4mre.py
0 → 100644
View file @
7614a8f3
import
os
import
numpy
as
np
from
best_download
import
download_file
from
lm_eval.base
import
MultipleChoiceTask
,
rf
,
mean
import
xml.etree.ElementTree
as
ET
import
random
class
QA4MRE
(
MultipleChoiceTask
):
YEAR
=
None
def
download
(
self
):
year
=
self
.
YEAR
lang
=
"EN"
base_path
=
(
"http://nlp.uned.es/clef-qa/repository/js/scripts/downloadFile.php?"
"file=/var/www/html/nlp/clef-qa/repository/resources/QA4MRE/"
)
# TODO: add side tasks?
variable_year_path
=
{
2011
:
'2011/Training_Data/Goldstandard/'
,
2012
:
'2012/Main_Task/Training_Data/Goldstandard/Used_in_Evaluation/'
,
2013
:
'2013/Main_Task/Training_Data/Goldstandard/'
}
sha256sums
=
{
2011
:
"6d2524952a3a015f2a82df785b85b5578681e3602ec276b4e72c01f4ebc50034"
,
2012
:
"f9edaf408f8ac93f89a643a0d0b19263a1bb5ce64f19b2af10df279a656dfb24"
,
2013
:
"c60e5aa4ec77e0493ef0b11d46bd1d74d58a499a3a2f871b8cf3af9536f0f094"
,
}
vpath
=
variable_year_path
[
year
]
url_path
=
f
"
{
base_path
}{
vpath
}
QA4MRE-
{
year
}
-
{
lang
}
_GS.xml"
if
not
os
.
path
.
exists
(
"data/qa4mre"
):
os
.
mkdir
(
"data/qa4mre"
)
if
not
os
.
path
.
isfile
(
f
"data/qa4mre/QA4MRE-
{
year
}
-
{
lang
}
"
):
download_file
(
url_path
,
f
"data/qa4mre/QA4MRE-
{
year
}
-
{
lang
}
_GS.xml"
,
checksum
=
sha256sums
[
year
],
)
def
has_training_docs
(
self
):
return
False
def
has_validation_docs
(
self
):
return
False
def
has_test_docs
(
self
):
return
True
def
fewshot_examples
(
self
,
k
):
# Since only test docs sample from test docs
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
test_docs
())
return
random
.
sample
(
self
.
_training_docs
,
k
)
def
_convert_standard
(
self
,
question
):
choices
=
[
i
.
text
for
i
in
question
.
iter
(
'answer'
)]
out_doc
=
{
"query"
:
question
.
find
(
'q_str'
).
text
,
"choices"
:
choices
,
"gold"
:
int
(
question
.
find
(
"./answer[@correct='Yes']"
).
attrib
[
"a_id"
])
-
1
,
}
return
out_doc
def
load_docs
(
self
,
textfilename
,
tfds
=
False
):
tree
=
ET
.
parse
(
textfilename
)
root
=
tree
.
getroot
()
# TODO: context is much larger than the context sometimes
# at the moment, it just gets left-truncated by LM automatically, and maybe that's good enough?
for
reading_test
in
root
.
iter
(
'reading-test'
):
src
=
reading_test
[
0
].
text
src
=
src
.
strip
().
replace
(
"
\'
"
,
"'"
)
for
qid
,
question
in
enumerate
(
reading_test
.
iter
(
'q'
)):
out_doc
=
self
.
_convert_standard
(
question
)
out_doc
[
'source'
]
=
src
yield
out_doc
def
fewshot_description
(
self
):
return
""
def
test_docs
(
self
):
return
self
.
load_docs
(
f
"data/qa4mre/QA4MRE-
{
self
.
YEAR
}
-EN_GS.xml"
)
def
doc_to_text
(
self
,
doc
):
return
"{}
\n
Question: {}
\n
Answer:"
.
format
(
doc
[
"source"
],
doc
[
"query"
])
class
QA4MRE_2011
(
QA4MRE
):
YEAR
=
2011
class
QA4MRE_2012
(
QA4MRE
):
YEAR
=
2012
class
QA4MRE_2013
(
QA4MRE
):
YEAR
=
2013
lm_eval/tasks/sciq.py
View file @
7614a8f3
...
...
@@ -3,6 +3,7 @@ import json
from
..utils
import
sh
from
lm_eval.base
import
MultipleChoiceTask
,
rf
,
mean
import
zipfile
from
best_download
import
download_file
class
SciQ
(
MultipleChoiceTask
):
...
...
@@ -10,9 +11,11 @@ class SciQ(MultipleChoiceTask):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/sciq'
):
os
.
mkdir
(
'data/sciq'
)
sh
((
"wget https://ai2-public-datasets.s3.amazonaws.com/sciq/SciQ.zip -O data/sciq/SciQ.zip"
))
download_file
(
'https://ai2-public-datasets.s3.amazonaws.com/sciq/SciQ.zip'
,
'data/sciq/SciQ.zip'
,
'7f3312f6ac6b09970b32942d106a8c44ec0dad46a0369f17d635aff8e348a87c'
,
)
with
zipfile
.
ZipFile
(
"data/sciq/SciQ.zip"
,
"r"
)
as
zf
:
zf
.
extractall
(
"data/sciq/"
)
...
...
@@ -48,8 +51,6 @@ class SciQ(MultipleChoiceTask):
yield
self
.
_convert_standard
(
record
)
def
fewshot_description
(
self
):
# Average ctx length in labelled dataset is 238.9
# 2 few-shot exmamples pushes it beyond context window
return
""
def
training_docs
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment