Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
a1a4a32e
Unverified
Commit
a1a4a32e
authored
Feb 02, 2021
by
Leo Gao
Committed by
GitHub
Feb 02, 2021
Browse files
Merge pull request #119 from jon-tow/task-refactor
Refactor `Dataset` naming and `HFTask` properties
parents
826d90e2
5cfb7308
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
46 additions
and
41 deletions
+46
-41
lm_eval/base.py
lm_eval/base.py
+19
-13
lm_eval/tasks/arithmetic.py
lm_eval/tasks/arithmetic.py
+2
-2
lm_eval/tasks/common.py
lm_eval/tasks/common.py
+4
-5
lm_eval/tasks/coqa.py
lm_eval/tasks/coqa.py
+2
-2
lm_eval/tasks/drop.py
lm_eval/tasks/drop.py
+2
-2
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+2
-2
lm_eval/tasks/naturalqs.py
lm_eval/tasks/naturalqs.py
+3
-3
lm_eval/tasks/piqa.py
lm_eval/tasks/piqa.py
+2
-2
lm_eval/tasks/quac.py
lm_eval/tasks/quac.py
+2
-2
lm_eval/tasks/sat.py
lm_eval/tasks/sat.py
+2
-2
lm_eval/tasks/storycloze.py
lm_eval/tasks/storycloze.py
+2
-2
lm_eval/tasks/triviaqa.py
lm_eval/tasks/triviaqa.py
+2
-2
lm_eval/tasks/wsc273.py
lm_eval/tasks/wsc273.py
+2
-2
No files found.
lm_eval/base.py
View file @
a1a4a32e
...
...
@@ -58,10 +58,10 @@ class LM(abc.ABC):
return
cls
()
class
Dataset
(
abc
.
ABC
):
class
Task
(
abc
.
ABC
):
def
__init__
(
self
):
self
.
download
()
self
.
_traindocs
=
None
self
.
_train
ing_
docs
=
None
def
download
(
self
):
"""Downloads the task dataset if necessary"""
...
...
@@ -71,7 +71,7 @@ class Dataset(abc.ABC):
def
has_training_docs
(
self
):
"""Whether the task has a training set"""
pass
@
abc
.
abstractmethod
def
has_validation_docs
(
self
):
"""Whether the task has a validation set"""
...
...
@@ -84,23 +84,29 @@ class Dataset(abc.ABC):
def
training_docs
(
self
):
"""
:return: Iterable[obj]
A iterable of any object, that doc_to_text can handle
"""
return
[]
def
validation_docs
(
self
):
"""
:return: Iterable[obj]
A iterable of any object, that doc_to_text can handle
"""
return
[]
def
test_docs
(
self
):
"""
:return: Iterable[obj]
A iterable of any object, that doc_to_text can handle
"""
return
[]
def
fewshot_examples
(
self
,
k
):
if
self
.
_traindocs
is
None
:
self
.
_traindocs
=
list
(
self
.
training_docs
())
return
random
.
sample
(
self
.
_traindocs
,
k
)
def
fewshot_examples
(
self
,
k
):
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
training_docs
())
return
random
.
sample
(
self
.
_training_docs
,
k
)
@
abc
.
abstractmethod
def
doc_to_text
(
self
,
doc
):
...
...
@@ -123,7 +129,7 @@ class Dataset(abc.ABC):
part of the document for `doc`.
"""
pass
@
abc
.
abstractmethod
def
process_results
(
self
,
doc
,
results
):
"""Take a single document and the LM results and evaluates, returning a
...
...
@@ -161,7 +167,7 @@ class Dataset(abc.ABC):
def
fewshot_context
(
self
,
doc
,
num_fewshot
,
provide_description
):
raw_description
=
self
.
fewshot_description
()
description
=
(
raw_description
+
"
\n
===
\n\n
"
)
if
provide_description
and
raw_description
else
""
if
num_fewshot
==
0
:
labeled_examples
=
""
else
:
...
...
lm_eval/tasks/arithmetic.py
View file @
a1a4a32e
...
...
@@ -2,12 +2,12 @@ import abc
import
json
import
os
from
collections
import
namedtuple
from
lm_eval.base
import
Dataset
,
mean
,
rf
from
lm_eval.base
import
Task
,
mean
,
rf
from
best_download
import
download_file
ArithmeticDoc
=
namedtuple
(
'ArithmeticDoc'
,
[
'context'
,
'completion'
])
class
Arithmetic
(
Dataset
):
class
Arithmetic
(
Task
):
directory
=
'data/arithmetic/'
def
__init__
(
self
):
...
...
lm_eval/tasks/common.py
View file @
a1a4a32e
import
datasets
import
numpy
as
np
import
random
from
..base
import
Dataset
from
..base
import
Task
class
HFTask
(
Dataset
):
class
HFTask
(
Task
):
DATASET_PATH
=
None
DATASET_NAME
=
None
def
__init__
(
self
):
self
.
data
=
None
super
().
__init__
()
self
.
_training_docs
=
None
def
download
(
self
):
self
.
data
=
datasets
.
load_dataset
(
path
=
self
.
DATASET_PATH
,
name
=
self
.
DATASET_NAME
)
...
...
lm_eval/tasks/coqa.py
View file @
a1a4a32e
...
...
@@ -2,11 +2,11 @@
import
json
import
random
from
lm_eval.base
import
Dataset
from
lm_eval.base
import
Task
from
..utils
import
sh
class
CoQA
(
Dataset
):
class
CoQA
(
Task
):
def
__init__
(
self
):
self
.
download
()
def
download
(
self
):
...
...
lm_eval/tasks/drop.py
View file @
a1a4a32e
...
...
@@ -5,9 +5,9 @@ from sklearn.metrics import f1_score, matthews_corrcoef
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
HFTask
,
simple_accuracy_metric
,
yesno
from
pathlib
import
Path
from
..base
import
Dataset
from
..base
import
Task
class
DROP
(
Dataset
):
class
DROP
(
Task
):
DATAFOLDER
=
Path
(
__file__
).
parent
/
"../../data/drop"
def
__init__
(
self
):
...
...
lm_eval/tasks/lambada.py
View file @
a1a4a32e
from
lm_eval.base
import
Dataset
,
rf
,
mean
from
lm_eval.base
import
Task
,
rf
,
mean
from
lm_eval.utils
import
sh
import
json
import
math
from
best_download
import
download_file
class
LAMBADA
(
Dataset
):
class
LAMBADA
(
Task
):
def
download
(
self
):
sh
(
"mkdir -p data/lambada"
)
download_file
(
...
...
lm_eval/tasks/naturalqs.py
View file @
a1a4a32e
...
...
@@ -30,10 +30,10 @@ class NaturalQs(HFTask):
def
fewshot_examples
(
self
,
k
):
# Data is too large to fit in memory. We just sample from the first bit.
if
self
.
_traindocs
is
None
:
self
.
_traindocs
=
list
(
islice
(
self
.
training_docs
(),
0
,
100000
))
if
self
.
_train
ing_
docs
is
None
:
self
.
_train
ing_
docs
=
list
(
islice
(
self
.
training_docs
(),
0
,
100000
))
return
random
.
sample
(
self
.
_traindocs
,
k
)
return
random
.
sample
(
self
.
_train
ing_
docs
,
k
)
def
doc_to_text
(
self
,
doc
):
return
'Q: '
+
doc
[
'question'
][
'text'
]
+
'
\n\n
'
+
'A: '
...
...
lm_eval/tasks/piqa.py
View file @
a1a4a32e
import
json
import
random
from
lm_eval.base
import
Dataset
,
rf
,
mean
from
lm_eval.base
import
Task
,
rf
,
mean
from
..utils
import
sh
import
os
class
PiQA
(
Dataset
):
class
PiQA
(
Task
):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/piqa'
):
#TODO: use best_download
...
...
lm_eval/tasks/quac.py
View file @
a1a4a32e
import
json
import
random
import
os
from
lm_eval.base
import
Dataset
from
lm_eval.base
import
Task
from
..utils
import
sh
class
QuAC
(
Dataset
):
class
QuAC
(
Task
):
def
__init__
(
self
):
super
().
__init__
()
...
...
lm_eval/tasks/sat.py
View file @
a1a4a32e
import
json
import
random
import
os
from
lm_eval.base
import
Dataset
,
rf
,
mean
from
lm_eval.base
import
Task
,
rf
,
mean
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
simple_accuracy_metric
import
numpy
as
np
from
..utils
import
sh
class
SATAnalogies
(
Dataset
):
class
SATAnalogies
(
Task
):
NEEDS_MANUAL_DL
=
True
def
__init__
(
self
):
...
...
lm_eval/tasks/storycloze.py
View file @
a1a4a32e
import
json
import
random
from
lm_eval.base
import
Dataset
from
lm_eval.base
import
Task
from
..utils
import
sh
import
csv
class
StoryCloze
(
Dataset
):
class
StoryCloze
(
Task
):
NEEDS_MANUAL_DL
=
True
def
download
(
self
):
...
...
lm_eval/tasks/triviaqa.py
View file @
a1a4a32e
import
os
import
json
import
random
from
lm_eval.base
import
Dataset
,
mean
,
rf
from
lm_eval.base
import
Task
,
mean
,
rf
from
..utils
import
sh
class
TriviaQA
(
Dataset
):
class
TriviaQA
(
Task
):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/triviaqa'
):
sh
(
"""
...
...
lm_eval/tasks/wsc273.py
View file @
a1a4a32e
import
json
import
random
import
os
from
lm_eval.base
import
Dataset
from
lm_eval.base
import
Task
from
..utils
import
sh
class
WinogradSchemaChallenge273
(
Dataset
):
class
WinogradSchemaChallenge273
(
Task
):
def
__init__
(
self
):
super
().
__init__
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment