Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d6b91191
Unverified
Commit
d6b91191
authored
Oct 05, 2020
by
Stella Biderman
Committed by
GitHub
Oct 05, 2020
Browse files
Merge pull request #35 from zphang/superglue2
MultiRC
parents
00038fea
bac0a528
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
61 additions
and
0 deletions
+61
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+4
-0
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+57
-0
No files found.
lm_eval/tasks/__init__.py
View file @
d6b91191
...
@@ -5,6 +5,7 @@ from . import race
...
@@ -5,6 +5,7 @@ from . import race
from
.
import
webqs
from
.
import
webqs
TASK_REGISTRY
=
{
TASK_REGISTRY
=
{
# GLUE
"cola"
:
glue
.
CoLA
,
"cola"
:
glue
.
CoLA
,
"mnli"
:
glue
.
MNLI
,
"mnli"
:
glue
.
MNLI
,
"mrpc"
:
glue
.
MRPC
,
"mrpc"
:
glue
.
MRPC
,
...
@@ -14,11 +15,14 @@ TASK_REGISTRY = {
...
@@ -14,11 +15,14 @@ TASK_REGISTRY = {
"stsb"
:
glue
.
STSB
,
"stsb"
:
glue
.
STSB
,
"sst"
:
glue
.
SST
,
"sst"
:
glue
.
SST
,
"wnli"
:
glue
.
WNLI
,
"wnli"
:
glue
.
WNLI
,
# SuperGLUE
"boolq"
:
superglue
.
BoolQ
,
"boolq"
:
superglue
.
BoolQ
,
"commitmentbank"
:
superglue
.
CommitmentBank
,
"commitmentbank"
:
superglue
.
CommitmentBank
,
"copa"
:
superglue
.
Copa
,
"copa"
:
superglue
.
Copa
,
"multirc"
:
superglue
.
MultiRC
,
"wic"
:
superglue
.
WordsInContext
,
"wic"
:
superglue
.
WordsInContext
,
"wsc"
:
superglue
.
WinogradSchemaChallenge
,
"wsc"
:
superglue
.
WinogradSchemaChallenge
,
# Order by benchmark/genre?
"arc_easy"
:
arc
.
ARCEasy
,
"arc_easy"
:
arc
.
ARCEasy
,
"arc_challenge"
:
arc
.
ARCChallenge
,
"arc_challenge"
:
arc
.
ARCChallenge
,
"race"
:
race
.
RACE
,
"race"
:
race
.
RACE
,
...
...
lm_eval/tasks/superglue.py
View file @
d6b91191
...
@@ -120,6 +120,63 @@ class Copa(HFNLPTask):
...
@@ -120,6 +120,63 @@ class Copa(HFNLPTask):
return
choice
[
0
].
lower
()
+
choice
[
1
:]
return
choice
[
0
].
lower
()
+
choice
[
1
:]
class
MultiRC
(
HFNLPTask
):
NLP_PATH
=
"super_glue"
NLP_NAME
=
"multirc"
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
True
def
has_test_docs
(
self
):
return
True
def
fewshot_description
(
self
):
return
"READING COMPREHENSION ANSWER KEY"
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
return
f
"
{
doc
[
'paragraph'
]
}
\n\n
{
doc
[
'question'
]
}
\n
"
\
+
(
self
.
format_answer
(
answer
=
doc
[
"answer"
],
label
=
doc
[
"label"
])
if
include_target
else
""
)
@
staticmethod
def
format_answer
(
answer
,
label
):
label_str
=
"True"
if
label
else
"False"
return
f
"[
{
label_str
}
]
{
answer
}
"
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
preds
=
[]
for
doc
in
docs
:
ctx
=
self
.
fewshot_context
(
doc
=
doc
,
provide_description
=
provide_description
,
num_fewshot
=
num_fewshot
,
)
true_choice
=
self
.
format_answer
(
answer
=
doc
[
"answer"
],
label
=
True
)
false_choice
=
self
.
format_answer
(
answer
=
doc
[
"answer"
],
label
=
False
)
preds
.
append
(
lm
.
loglikelihood
(
ctx
,
f
'
{
true_choice
}
'
)
>
lm
.
loglikelihood
(
ctx
,
f
'
{
false_choice
}
'
)
)
# Only count as correct if all answers are labeled correctly for each question
question_scoring_dict
=
{}
for
doc
,
pred
in
zip
(
docs
,
preds
):
question_id
=
doc
[
"idx"
][
"question"
]
if
question_id
not
in
question_scoring_dict
:
question_scoring_dict
[
question_id
]
=
[]
gold_label
=
doc
[
"label"
]
==
1
question_scoring_dict
[
question_id
].
append
(
gold_label
==
pred
)
acc
=
np
.
mean
([
int
(
all
(
x
))
for
x
in
question_scoring_dict
.
values
()])
return
{
"major"
:
acc
,
"minor"
:
{
"acc"
:
acc
},
"higher_is_better"
:
True
,
}
class
WordsInContext
(
HFNLPTask
):
class
WordsInContext
(
HFNLPTask
):
NLP_PATH
=
"super_glue"
NLP_PATH
=
"super_glue"
NLP_NAME
=
"wic"
NLP_NAME
=
"wic"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment