Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b0db32bc
Unverified
Commit
b0db32bc
authored
Oct 23, 2020
by
Stella Biderman
Committed by
GitHub
Oct 23, 2020
Browse files
Merge pull request #50 from cfoster0/winograd
Winograd changes
parents
b8a3edaf
05bd05e9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
87 additions
and
2 deletions
+87
-2
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+3
-1
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+1
-1
lm_eval/tasks/wsc273.py
lm_eval/tasks/wsc273.py
+83
-0
No files found.
lm_eval/tasks/__init__.py
View file @
b0db32bc
...
...
@@ -4,6 +4,7 @@ from . import arc
from
.
import
race
from
.
import
webqs
from
.
import
anli
from
.
import
wsc273
from
.
import
winogrande
from
.
import
quac
from
.
import
hellaswag
...
...
@@ -27,7 +28,7 @@ TASK_REGISTRY = {
"copa"
:
superglue
.
Copa
,
"multirc"
:
superglue
.
MultiRC
,
"wic"
:
superglue
.
WordsInContext
,
"wsc"
:
superglue
.
WinogradSchemaChallenge
,
"wsc"
:
superglue
.
SG
WinogradSchemaChallenge
,
# Order by benchmark/genre?
"arc_easy"
:
arc
.
ARCEasy
,
"arc_challenge"
:
arc
.
ARCChallenge
,
...
...
@@ -37,6 +38,7 @@ TASK_REGISTRY = {
"squad"
:
squad
.
SQuAD
,
"race"
:
race
.
RACE
,
"webqs"
:
webqs
.
WebQs
,
"wsc273"
:
wsc273
.
WinogradSchemaChallenge273
,
"winogrande"
:
winogrande
.
Winogrande
,
"anli_r1"
:
anli
.
ANLIRound1
,
"anli_r2"
:
anli
.
ANLIRound2
,
...
...
lm_eval/tasks/superglue.py
View file @
b0db32bc
...
...
@@ -218,7 +218,7 @@ class WordsInContext(HFTask):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
class
WinogradSchemaChallenge
(
HFTask
):
class
SG
WinogradSchemaChallenge
(
HFTask
):
DATASET_PATH
=
"super_glue"
DATASET_NAME
=
"wsc"
...
...
lm_eval/tasks/wsc273.py
0 → 100644
View file @
b0db32bc
import
json
import
random
import
os
from
lm_eval.base
import
Dataset
from
..utils
import
sh
class
WinogradSchemaChallenge273
(
Dataset
):
def
__init__
(
self
):
super
().
__init__
()
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/wsc273'
):
sh
(
"""
mkdir -p data/wsc273
wget https://git.cse.msu.edu/bakerb15/nlp-final-project/raw/master/Winogard/reproduce/commonsense_test/wsc273.json -O data/wsc273/wsc273.json
"""
)
def
has_training_docs
(
self
):
return
False
def
has_validation_docs
(
self
):
return
False
def
has_test_docs
(
self
):
return
True
def
training_docs
(
self
):
return
[]
def
validation_docs
(
self
):
return
[]
def
test_docs
(
self
):
myjson
=
json
.
load
(
open
(
'data/wsc273/wsc273.json'
))
return
self
.
load_doc
(
myjson
)
def
fewshot_description
(
self
):
# This format is ONLY for the purposes of deduplication. For the task evaluation, we'll need to find a new strategy,
# to meet the needs of this particular task.
return
"Winograd schema sentence with correct continuation. True. Winograd schema sentence with incorrect continuation. False."
def
load_doc
(
self
,
myjson
):
docs
=
[]
for
i
in
range
(
0
,
273
*
2
,
2
):
item1
=
myjson
[
i
]
item2
=
myjson
[
i
+
1
]
if
item1
[
'question_id'
]
!=
item2
[
'question_id'
]:
raise
ValueError
(
"WSC273 has missing completion pair."
)
question_id
=
item1
[
'question_id'
]
if
item1
[
'correctness'
]
==
True
:
doc
=
{
'id'
:
question_id
,
'completions'
:
{
'T'
:
item1
[
'substitution'
],
'F'
:
item2
[
'substitution'
],
},
}
if
item2
[
'correctness'
]
==
True
:
doc
=
{
'id'
:
question_id
,
'completions'
:
{
'F'
:
item1
[
'substitution'
],
'T'
:
item2
[
'substitution'
],
},
}
docs
.
append
(
doc
)
return
docs
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
# WSC273 is currently only writing out full examples. Partial evaluation needs implementing.
text
=
doc
[
'completions'
][
'T'
]
+
' True. '
+
doc
[
'completions'
][
'F'
]
+
' False.'
return
text
def
evaluate
(
self
,
docs
,
lm
):
# TODO: Write evaluation function
raise
NotImplementedError
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment