Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9017bb36
"vscode:/vscode.git/clone" did not exist on "369108e1ad6049aff50711b2499a7f59b2615dc4"
Unverified
Commit
9017bb36
authored
Oct 22, 2020
by
Charles Foster
Committed by
GitHub
Oct 22, 2020
Browse files
Merge branch 'master' into quac
parents
40cc4e28
2338188f
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
53 additions
and
3 deletions
+53
-3
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+2
-0
lm_eval/tasks/race.py
lm_eval/tasks/race.py
+2
-2
lm_eval/tasks/squad.py
lm_eval/tasks/squad.py
+44
-0
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+5
-1
No files found.
lm_eval/tasks/__init__.py
View file @
9017bb36
...
...
@@ -5,6 +5,7 @@ from . import race
from
.
import
webqs
from
.
import
anli
from
.
import
quac
from
.
import
squad
TASK_REGISTRY
=
{
# GLUE
...
...
@@ -28,6 +29,7 @@ TASK_REGISTRY = {
"arc_easy"
:
arc
.
ARCEasy
,
"arc_challenge"
:
arc
.
ARCChallenge
,
"quac"
:
quac
.
QuAC
,
"squad"
:
squad
.
SQuAD
,
"race"
:
race
.
RACE
,
"webqs"
:
webqs
.
WebQs
,
"anli_r1"
:
anli
.
ANLIRound1
,
...
...
lm_eval/tasks/race.py
View file @
9017bb36
from
.
common
import
HFTask
from
..utils_stream
import
X
,
each
,
apply
,
join
,
filt
,
one
import
collections
import
nlp
import
datasets
class
RACE
(
HFTask
):
...
...
@@ -26,7 +26,7 @@ class RACE(HFTask):
# is shown that one document is made per passage.
r
=
collections
.
defaultdict
(
list
)
for
item
in
nlp
.
load_dataset
(
path
=
self
.
DATASET_PATH
,
name
=
self
.
DATASET_NAME
)[
set
]:
for
item
in
datasets
.
load_dataset
(
path
=
self
.
DATASET_PATH
,
name
=
self
.
DATASET_NAME
)[
set
]:
r
[
item
[
'article'
]].
append
(
item
)
res
=
list
(
r
.
values
()
>>
each
(
lambda
x
:
{
...
...
lm_eval/tasks/squad.py
0 → 100644
View file @
9017bb36
import
numpy
as
np
from
scipy.stats
import
pearsonr
,
spearmanr
from
sklearn.metrics
import
f1_score
,
matthews_corrcoef
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
HFTask
,
simple_accuracy_metric
,
yesno
class
SQuAD
(
HFTask
):
DATASET_PATH
=
"squad_v2"
DATASET_NAME
=
None
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
True
def
has_test_docs
(
self
):
return
False
def
training_docs
(
self
):
if
self
.
has_training_docs
():
return
self
.
data
[
"train"
]
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
return
self
.
data
[
"validation"
]
def
fewshot_description
(
self
):
return
"Title: The_Title_of_It
\n\n
Background: A text passage as background to answer the question with.
\n\n
Q: Question about the passage.
\n\n
A: Answer."
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
text
=
'Title: '
+
doc
[
'title'
]
+
'
\n\n
'
+
'Background: '
+
doc
[
'context'
]
+
'
\n\n
'
+
'Q: '
+
doc
[
'question'
]
+
'
\n\n
'
+
'A: '
if
include_target
:
answer_list
=
doc
[
'answers'
][
'text'
]
if
len
(
answer_list
)
>
0
:
answer
=
answer_list
[
0
]
else
:
answer
=
'unanswerable'
text
+=
answer
return
text
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
# TODO: Write evaluation function
raise
NotImplementedError
()
\ No newline at end of file
lm_eval/tasks/superglue.py
View file @
9017bb36
...
...
@@ -94,7 +94,11 @@ class Copa(HFTask):
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
# Drop the period
text
=
doc
[
"premise"
].
strip
()[:
-
1
]
+
" because "
connector
=
{
"cause"
:
"because"
,
"effect"
:
"therefore"
,
}[
doc
[
"question"
]]
text
=
doc
[
"premise"
].
strip
()[:
-
1
]
+
f
"
{
connector
}
"
if
include_target
:
correct_choice
=
doc
[
"choice1"
]
if
doc
[
"label"
]
==
0
else
doc
[
"choice2"
]
# Connect the sentences
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment