Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
5cfb7308
Commit
5cfb7308
authored
Feb 02, 2021
by
Jonathan Tow
Browse files
Fix naming convention to avoid `pytest` name mangling invocation
parent
a60ef6fa
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
23 additions
and
23 deletions
+23
-23
lm_eval/base.py
lm_eval/base.py
+4
-4
lm_eval/tasks/anli.py
lm_eval/tasks/anli.py
+3
-3
lm_eval/tasks/common.py
lm_eval/tasks/common.py
+3
-3
lm_eval/tasks/naturalqs.py
lm_eval/tasks/naturalqs.py
+3
-3
lm_eval/tasks/openbookqa.py
lm_eval/tasks/openbookqa.py
+3
-3
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+7
-7
No files found.
lm_eval/base.py
View file @
5cfb7308
...
...
@@ -61,7 +61,7 @@ class LM(abc.ABC):
class
Task
(
abc
.
ABC
):
def
__init__
(
self
):
self
.
download
()
self
.
_
_
training_docs
=
None
self
.
_training_docs
=
None
def
download
(
self
):
"""Downloads the task dataset if necessary"""
...
...
@@ -104,9 +104,9 @@ class Task(abc.ABC):
return
[]
def
fewshot_examples
(
self
,
k
):
if
self
.
_
_
training_docs
is
None
:
self
.
_
_
training_docs
=
list
(
self
.
training_docs
())
return
random
.
sample
(
self
.
_
_
training_docs
,
k
)
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
training_docs
())
return
random
.
sample
(
self
.
_training_docs
,
k
)
@
abc
.
abstractmethod
def
doc_to_text
(
self
,
doc
):
...
...
lm_eval/tasks/anli.py
View file @
5cfb7308
...
...
@@ -18,9 +18,9 @@ class ANLIBase(HFTask):
def
training_docs
(
self
):
if
self
.
has_training_docs
():
if
self
.
_
_
training_docs
is
None
:
self
.
_
_
training_docs
=
list
(
self
.
data
[
"train_r"
+
str
(
self
.
SPLIT
)])
return
self
.
_
_
training_docs
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
data
[
"train_r"
+
str
(
self
.
SPLIT
)])
return
self
.
_training_docs
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
...
...
lm_eval/tasks/common.py
View file @
5cfb7308
...
...
@@ -30,9 +30,9 @@ class HFTask(Task):
# Cache training for faster few-shot.
# If data is too large to fit in memory, override this method.
if
self
.
has_training_docs
():
if
self
.
_
_
training_docs
is
None
:
self
.
_
_
training_docs
=
list
(
self
.
data
[
"train"
])
return
self
.
_
_
training_docs
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
data
[
"train"
])
return
self
.
_training_docs
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
...
...
lm_eval/tasks/naturalqs.py
View file @
5cfb7308
...
...
@@ -30,10 +30,10 @@ class NaturalQs(HFTask):
def
fewshot_examples
(
self
,
k
):
# Data is too large to fit in memory. We just sample from the first bit.
if
self
.
_
_
training_docs
is
None
:
self
.
_
_
training_docs
=
list
(
islice
(
self
.
training_docs
(),
0
,
100000
))
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
islice
(
self
.
training_docs
(),
0
,
100000
))
return
random
.
sample
(
self
.
_
_
training_docs
,
k
)
return
random
.
sample
(
self
.
_training_docs
,
k
)
def
doc_to_text
(
self
,
doc
):
return
'Q: '
+
doc
[
'question'
][
'text'
]
+
'
\n\n
'
+
'A: '
...
...
lm_eval/tasks/openbookqa.py
View file @
5cfb7308
...
...
@@ -19,9 +19,9 @@ class OpenBookQA(HFTask):
def
training_docs
(
self
):
if
self
.
has_training_docs
():
if
self
.
_
_
training_docs
is
None
:
self
.
_
_
training_docs
=
list
(
self
.
data
[
"train"
])
return
self
.
_
_
training_docs
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
list
(
self
.
data
[
"train"
])
return
self
.
_training_docs
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
...
...
lm_eval/tasks/superglue.py
View file @
5cfb7308
...
...
@@ -273,17 +273,17 @@ class ReCoRD(HFTask):
# Hence, we one "doc" for each (context + passage, answer) pair.
# Moreover, we only use the correct answers for context packing
# (This is not an issue for evaluation, where we can directly score multiple candidates at once).
if
self
.
_
_
training_docs
is
None
:
self
.
_
_
training_docs
=
[]
if
self
.
_training_docs
is
None
:
self
.
_training_docs
=
[]
for
doc
in
self
.
data
[
"train"
]:
for
entity
in
list
(
set
(
doc
[
"entities"
])):
self
.
_
_
training_docs
.
append
({
self
.
_training_docs
.
append
({
"passage"
:
doc
[
"passage"
],
"query"
:
doc
[
"query"
],
"entity"
:
entity
,
"label"
:
entity
in
doc
[
"answers"
],
})
return
self
.
_
_
training_docs
return
self
.
_training_docs
def
validation_docs
(
self
):
for
doc
in
self
.
data
[
"validation"
]:
...
...
@@ -417,14 +417,14 @@ class SGWinogradSchemaChallenge(HFTask):
def
training_docs
(
self
):
if
self
.
has_training_docs
():
if
self
.
_
_
training_docs
is
None
:
if
self
.
_training_docs
is
None
:
# GPT-3 Paper's format only uses positive examples for fewshot "training"
self
.
_
_
training_docs
=
[
self
.
_training_docs
=
[
doc
for
doc
in
self
.
data
[
"train"
]
if
doc
[
"label"
]
]
return
self
.
_
_
training_docs
return
self
.
_training_docs
def
fewshot_description
(
self
):
return
"Final Exam with Answer Key
\n
"
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment