Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
31b2a5dd
Commit
31b2a5dd
authored
Oct 05, 2020
by
Leo Gao
Browse files
Add WebQs (data only)
parent
6cc21f4b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
34 additions
and
0 deletions
+34
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+2
-0
lm_eval/tasks/webqs.py
lm_eval/tasks/webqs.py
+32
-0
No files found.
lm_eval/tasks/__init__.py
View file @
31b2a5dd
...
...
@@ -2,6 +2,7 @@ from . import superglue
from
.
import
glue
from
.
import
arc
from
.
import
race
from
.
import
webqs
TASK_REGISTRY
=
{
"cola"
:
glue
.
CoLA
,
...
...
@@ -21,6 +22,7 @@ TASK_REGISTRY = {
"arc_easy"
:
arc
.
ARCEasy
,
"arc_challenge"
:
arc
.
ARCChallenge
,
"race"
:
race
.
RACE
,
"webqs"
:
webqs
.
WebQs
,
}
...
...
lm_eval/tasks/webqs.py
0 → 100644
View file @
31b2a5dd
from
.
common
import
HFNLPTask
class
WebQs
(
HFNLPTask
):
NLP_PATH
=
"web_questions"
NLP_NAME
=
None
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
False
def
has_test_docs
(
self
):
return
True
def
fewshot_description
(
self
):
# TODO: figure out description
return
""
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
print
(
doc
)
q
=
"Question: "
+
doc
[
'question'
]
+
'
\n
'
# this picks one answer to be the "correct" one, despite sometimes
# multiple correct answers being possible.
# TODO: make sure we're actually handling multi-answer correctly
a
=
"Answer:"
+
((
" "
+
doc
[
'answers'
][
0
])
if
include_target
else
''
)
return
q
+
a
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
# TODO: implement
raise
NotImplementedError
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment