Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
f348fa2c
Unverified
Commit
f348fa2c
authored
Oct 05, 2020
by
Leo Gao
Committed by
GitHub
Oct 05, 2020
Browse files
Merge pull request #39 from EleutherAI/add_lambada
add_lambada
parents
43978e3b
f161731c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
82 additions
and
38 deletions
+82
-38
lm_eval/tasks/common.py
lm_eval/tasks/common.py
+4
-4
lm_eval/tasks/glue.py
lm_eval/tasks/glue.py
+33
-34
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+45
-0
No files found.
lm_eval/tasks/common.py
View file @
f348fa2c
...
...
@@ -4,14 +4,14 @@ import random
from
..base
import
Dataset
class
HF
NLP
Task
(
Dataset
):
NLP
_PATH
=
None
NLP
_NAME
=
None
class
HFTask
(
Dataset
):
DATASET
_PATH
=
None
DATASET
_NAME
=
None
def
__init__
(
self
):
super
().
__init__
()
self
.
_training_docs
=
None
self
.
data
=
datasets
.
load_dataset
(
path
=
self
.
NLP
_PATH
,
name
=
self
.
NLP
_NAME
)
self
.
data
=
datasets
.
load_dataset
(
path
=
self
.
DATASET
_PATH
,
name
=
self
.
DATASET
_NAME
)
def
has_training_docs
(
self
):
"""Whether the task has a training set"""
...
...
lm_eval/tasks/glue.py
View file @
f348fa2c
...
...
@@ -2,8 +2,7 @@ import numpy as np
from
scipy.stats
import
pearsonr
,
spearmanr
from
sklearn.metrics
import
f1_score
,
matthews_corrcoef
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
HFNLPTask
,
simple_accuracy_metric
,
yesno
from
.
common
import
HFTask
,
simple_accuracy_metric
,
yesno
def
get_accuracy_and_f1
(
preds
,
golds
):
golds
=
np
.
array
(
golds
)
...
...
@@ -22,10 +21,10 @@ def get_accuracy_and_f1(preds, golds):
}
class
CoLA
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"cola"
class
CoLA
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"cola"
def
has_training_docs
(
self
):
return
True
...
...
@@ -64,9 +63,9 @@ class CoLA(HFNLPTask):
}
class
MNLI
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"mnli"
class
MNLI
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"mnli"
def
has_training_docs
(
self
):
return
True
...
...
@@ -79,11 +78,11 @@ class MNLI(HFNLPTask):
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
return
self
.
_load_nlp_dataset
()
[
"validation_matched"
]
return
self
.
data
[
"validation_matched"
]
def
test_docs
(
self
):
if
self
.
has_test_docs
():
return
self
.
_load_nlp_dataset
()
[
"test_matched"
]
return
self
.
data
[
"test_matched"
]
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
text
=
"{}
\n
question:
\t
{}
\t
True, False or Neither?
\n
answer:"
.
format
(
...
...
@@ -115,9 +114,9 @@ class MNLI(HFNLPTask):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
class
MRPC
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"mrpc"
class
MRPC
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"mrpc"
def
has_training_docs
(
self
):
return
True
...
...
@@ -152,10 +151,10 @@ class MRPC(HFNLPTask):
preds
.
append
(
lm
.
loglikelihood
(
ctx
,
'yes'
)
>
lm
.
loglikelihood
(
ctx
,
'no'
))
return
get_accuracy_and_f1
(
preds
=
preds
,
golds
=
golds
)
class
RTE
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"rte"
class
RTE
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"rte"
def
has_training_docs
(
self
):
return
True
...
...
@@ -190,9 +189,9 @@ class RTE(HFNLPTask):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
class
QNLI
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"qnli"
class
QNLI
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"qnli"
def
has_training_docs
(
self
):
return
True
...
...
@@ -227,9 +226,9 @@ class QNLI(HFNLPTask):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
class
QQP
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"qqp"
class
QQP
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"qqp"
def
has_training_docs
(
self
):
return
True
...
...
@@ -265,9 +264,9 @@ class QQP(HFNLPTask):
return
get_accuracy_and_f1
(
preds
=
preds
,
golds
=
golds
)
class
STSB
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"stsb"
class
STSB
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"stsb"
def
has_training_docs
(
self
):
return
True
...
...
@@ -322,9 +321,9 @@ class STSB(HFNLPTask):
}
class
SST
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"sst2"
class
SST
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"sst2"
def
has_training_docs
(
self
):
return
True
...
...
@@ -359,10 +358,10 @@ class SST(HFNLPTask):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
class
WNLI
(
HF
NLP
Task
):
NLP
_PATH
=
"glue"
NLP
_NAME
=
"wnli"
class
WNLI
(
HFTask
):
DATASET
_PATH
=
"glue"
DATASET
_NAME
=
"wnli"
def
has_training_docs
(
self
):
return
True
...
...
lm_eval/tasks/lambada.py
0 → 100644
View file @
f348fa2c
from
lm_eval.base
import
Dataset
from
lm_eval.utils
import
sh
import
json
import
requests
import
ftfy
class
Lambada
(
Dataset
):
def
download
(
self
):
sh
(
"mkdir -p data/lambada"
)
with
open
(
"data/lambada/lambada_test.json"
,
'w'
)
as
f
:
req
=
requests
.
get
(
"https://storage.googleapis.com/gpt-2/data/lambada_test.jsonl"
)
req
.
raise_for_status
()
jsons
=
[
json
.
loads
(
l
)
for
l
in
req
.
iter_lines
()]
texts
=
[
ftfy
.
fix_text
(
j
[
'text'
],
normalization
=
'NFKC'
)
for
j
in
jsons
]
json
.
dump
(
texts
,
f
)
def
has_training_docs
(
self
):
return
False
def
has_validation_docs
(
self
):
return
False
def
has_test_docs
(
self
):
return
True
def
training_docs
(
self
):
pass
def
validation_docs
(
self
):
pass
def
load_doc
(
self
,
myjson
):
return
[
doc
[
'text'
]
for
doc
in
myjson
]
def
test_docs
(
self
):
myjson
=
json
.
load
(
open
(
"data/lambada/lambada_test.json"
))
return
self
.
load_doc
(
myjson
)
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
pass
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
pass
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment