Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
ac47d481
Unverified
Commit
ac47d481
authored
Feb 13, 2021
by
Leo Gao
Committed by
GitHub
Feb 13, 2021
Browse files
Merge branch 'master' into translation
parents
404530d0
0601c909
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
103 additions
and
1 deletion
+103
-1
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+4
-1
lm_eval/tasks/headqa.py
lm_eval/tasks/headqa.py
+48
-0
lm_eval/tasks/mathqa.py
lm_eval/tasks/mathqa.py
+51
-0
No files found.
lm_eval/tasks/__init__.py
View file @
ac47d481
...
...
@@ -24,7 +24,8 @@ from . import sciq
from
.
import
webqs
from
.
import
qa4mre
from
.
import
translation
from
.
import
headqa
from
.
import
mathqa
TASK_REGISTRY
=
{
# GLUE
...
...
@@ -70,6 +71,8 @@ TASK_REGISTRY = {
# "squad": squad.SQuAD, # not implemented yet
"race"
:
race
.
RACE
,
# "naturalqs": naturalqs.NaturalQs, # not implemented yet
"headqa"
:
headqa
.
HeadQA
,
"mathqa"
:
mathqa
.
MathQA
,
"webqs"
:
webqs
.
WebQs
,
"wsc273"
:
wsc273
.
WinogradSchemaChallenge273
,
"winogrande"
:
winogrande
.
Winogrande
,
...
...
lm_eval/tasks/headqa.py
0 → 100644
View file @
ac47d481
from
.
common
import
HFTask
from
lm_eval.base
import
MultipleChoiceTask
class
HeadQA
(
HFTask
,
MultipleChoiceTask
):
DATASET_PATH
=
"head_qa"
DATASET_NAME
=
None
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
True
def
has_test_docs
(
self
):
return
True
def
_convert_standard
(
self
,
doc
):
out_doc
=
{
"id"
:
doc
[
"qid"
],
"query"
:
"Question: "
+
doc
[
"qtext"
]
+
"
\n
Answer:"
,
"choices"
:
[
answer
[
"atext"
]
for
answer
in
doc
[
"answers"
]],
"gold"
:
int
(
doc
[
"ra"
])
-
1
,
}
return
out_doc
def
_load_docs
(
self
,
docs
):
for
doc
in
docs
:
yield
self
.
_convert_standard
(
doc
)
def
training_docs
(
self
):
docs
=
super
().
training_docs
()
return
self
.
_load_docs
(
docs
)
def
validation_docs
(
self
):
docs
=
super
().
validation_docs
()
return
self
.
_load_docs
(
docs
)
def
test_docs
(
self
):
docs
=
super
().
test_docs
()
return
self
.
_load_docs
(
docs
)
def
fewshot_description
(
self
):
# TODO: figure out description
return
""
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
lm_eval/tasks/mathqa.py
0 → 100644
View file @
ac47d481
from
.
common
import
HFTask
from
lm_eval.base
import
mean
,
rf
,
MultipleChoiceTask
import
re
class
MathQA
(
HFTask
,
MultipleChoiceTask
):
DATASET_PATH
=
"math_qa"
DATASET_NAME
=
None
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
True
def
has_test_docs
(
self
):
return
True
def
_convert_standard
(
self
,
doc
):
answer_idx
=
[
'a'
,
'b'
,
'c'
,
'd'
,
'e'
].
index
(
doc
[
'correct'
])
choices
=
[
c
[
4
:].
rstrip
(
" ,"
)
for
c
in
re
.
findall
(
r
"[abcd] \) .*?, |e \) .*?$"
,
doc
[
'options'
])]
out_doc
=
{
"query"
:
"Question: "
+
doc
[
'Problem'
]
+
"
\n
Answer:"
,
"choices"
:
choices
,
"gold"
:
answer_idx
,
}
return
out_doc
def
_load_docs
(
self
,
docs
):
for
record
in
docs
:
yield
self
.
_convert_standard
(
record
)
def
training_docs
(
self
):
docs
=
super
().
training_docs
()
return
self
.
_load_docs
(
docs
)
def
validation_docs
(
self
):
docs
=
super
().
validation_docs
()
return
self
.
_load_docs
(
docs
)
def
test_docs
(
self
):
docs
=
super
().
test_docs
()
return
self
.
_load_docs
(
docs
)
def
fewshot_description
(
self
):
# TODO: figure out description
return
""
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment