Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
4e94af6f
".github/vscode:/vscode.git/clone" did not exist on "aee0ef52f52c279c0195427721bcf6b232dc1b1a"
Unverified
Commit
4e94af6f
authored
May 24, 2023
by
Lintang Sutawika
Committed by
GitHub
May 24, 2023
Browse files
Merge pull request #522 from EleutherAI/fix-mgpt-fewshot
parents
84ef60ee
25699d3e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
152 additions
and
0 deletions
+152
-0
lm_eval/tasks/pawsx.py
lm_eval/tasks/pawsx.py
+76
-0
lm_eval/tasks/xnli.py
lm_eval/tasks/xnli.py
+76
-0
No files found.
lm_eval/tasks/pawsx.py
View file @
4e94af6f
...
...
@@ -26,6 +26,7 @@ Homepage: https://github.com/google-research-datasets/paws/tree/master/pawsx
"""
from
lm_eval.base
import
Task
,
rf
from
lm_eval.metrics
import
mean
from
lm_eval
import
utils
_CITATION
=
"""
@inproceedings{yang-etal-2019-paws,
...
...
@@ -85,6 +86,11 @@ class PAWSXBase(Task):
def
doc_to_target
(
self
,
doc
):
return
" "
+
[
self
.
YES
,
self
.
NO
][
doc
[
"label"
]]
def
doc_to_fewshot_prompt
(
self
,
doc
):
prompt
=
self
.
doc_to_text
(
doc
)
return
prompt
.
replace
(
"[MASK]"
,
self
.
doc_to_target
(
doc
)[
1
:])
def
construct_requests
(
self
,
doc
,
ctx
):
"""Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
...
...
@@ -136,6 +142,76 @@ class PAWSXBase(Task):
def
higher_is_better
(
self
):
return
{
"acc"
:
True
}
@
utils
.
positional_deprecated
def
fewshot_context
(
self
,
doc
,
num_fewshot
,
provide_description
=
None
,
rnd
=
None
,
description
=
None
):
"""Returns a fewshot context string that is made up of a prepended description
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
:param doc: str
The document as returned from training_docs, validation_docs, or test_docs.
:param num_fewshot: int
The number of fewshot examples to provide in the returned context string.
:param provide_description: bool
Not implemented, and this option is deprecated and will be removed in a future version in favor of a different description providing method
:param rnd: random.Random
The pseudo-random number generator used to randomly sample examples.
WARNING: This is currently a required arg although it's optionalized with a default `None`.
:param description: str
The task's description that will be prepended to the fewshot examples.
:returns: str
The fewshot context.
"""
assert
(
rnd
is
not
None
),
"A `random.Random` generator argument must be provided to `rnd`"
assert
not
provide_description
,
(
"The `provide_description` arg will be removed in future versions. To prepend "
"a custom description to the context, supply the corresponding string via the "
"`description` arg."
)
if
provide_description
is
not
None
:
# nudge people to not specify it at all
print
(
"WARNING: provide_description is deprecated and will be removed in a future version in favor of description_dict"
)
description
=
description
+
"
\n\n
"
if
description
else
""
if
num_fewshot
==
0
:
labeled_examples
=
""
else
:
# for sets with no training docs, draw from other set *but ensure no overlap with current doc*
if
self
.
has_training_docs
():
fewshotex
=
self
.
fewshot_examples
(
k
=
num_fewshot
,
rnd
=
rnd
)
else
:
if
self
.
_fewshot_docs
is
None
:
self
.
_fewshot_docs
=
list
(
self
.
validation_docs
()
if
self
.
has_validation_docs
()
else
self
.
test_docs
()
)
fewshotex
=
rnd
.
sample
(
self
.
_fewshot_docs
,
num_fewshot
+
1
)
# get rid of the doc that's the one we're evaluating, if it's in the fewshot
fewshotex
=
[
x
for
x
in
fewshotex
if
x
!=
doc
][:
num_fewshot
]
labeled_examples
=
(
"
\n\n
"
.
join
(
[
# self.doc_to_text(doc) + self.doc_to_target(doc)
self
.
doc_to_fewshot_prompt
(
doc
)
for
doc
in
fewshotex
]
)
+
"
\n\n
"
)
example
=
self
.
doc_to_text
(
doc
)
return
description
+
labeled_examples
+
example
class
PAWSX_en
(
PAWSXBase
):
DATASET_NAME
=
"en"
...
...
lm_eval/tasks/xnli.py
View file @
4e94af6f
...
...
@@ -18,6 +18,7 @@ Homepage: https://github.com/facebookresearch/XNLI
import
numpy
as
np
from
lm_eval.base
import
rf
,
Task
from
lm_eval.metrics
import
mean
from
lm_eval
import
utils
_CITATIONS
=
"""
@InProceedings{conneau2018xnli,
...
...
@@ -89,6 +90,11 @@ class XNLIBase(Task):
]
)
def
doc_to_fewshot_prompt
(
self
,
doc
):
prompt
=
self
.
doc_to_text
(
doc
)
return
prompt
.
replace
(
"[MASK]"
,
self
.
doc_to_target
(
doc
)[
1
:])
def
construct_requests
(
self
,
doc
,
ctx
):
"""Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
...
...
@@ -138,6 +144,76 @@ class XNLIBase(Task):
"""
return
{
"acc"
:
True
}
@
utils
.
positional_deprecated
def
fewshot_context
(
self
,
doc
,
num_fewshot
,
provide_description
=
None
,
rnd
=
None
,
description
=
None
):
"""Returns a fewshot context string that is made up of a prepended description
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
:param doc: str
The document as returned from training_docs, validation_docs, or test_docs.
:param num_fewshot: int
The number of fewshot examples to provide in the returned context string.
:param provide_description: bool
Not implemented, and this option is deprecated and will be removed in a future version in favor of a different description providing method
:param rnd: random.Random
The pseudo-random number generator used to randomly sample examples.
WARNING: This is currently a required arg although it's optionalized with a default `None`.
:param description: str
The task's description that will be prepended to the fewshot examples.
:returns: str
The fewshot context.
"""
assert
(
rnd
is
not
None
),
"A `random.Random` generator argument must be provided to `rnd`"
assert
not
provide_description
,
(
"The `provide_description` arg will be removed in future versions. To prepend "
"a custom description to the context, supply the corresponding string via the "
"`description` arg."
)
if
provide_description
is
not
None
:
# nudge people to not specify it at all
print
(
"WARNING: provide_description is deprecated and will be removed in a future version in favor of description_dict"
)
description
=
description
+
"
\n\n
"
if
description
else
""
if
num_fewshot
==
0
:
labeled_examples
=
""
else
:
# for sets with no training docs, draw from other set *but ensure no overlap with current doc*
if
self
.
has_training_docs
():
fewshotex
=
self
.
fewshot_examples
(
k
=
num_fewshot
,
rnd
=
rnd
)
else
:
if
self
.
_fewshot_docs
is
None
:
self
.
_fewshot_docs
=
list
(
self
.
validation_docs
()
if
self
.
has_validation_docs
()
else
self
.
test_docs
()
)
fewshotex
=
rnd
.
sample
(
self
.
_fewshot_docs
,
num_fewshot
+
1
)
# get rid of the doc that's the one we're evaluating, if it's in the fewshot
fewshotex
=
[
x
for
x
in
fewshotex
if
x
!=
doc
][:
num_fewshot
]
labeled_examples
=
(
"
\n\n
"
.
join
(
[
# self.doc_to_text(doc) + self.doc_to_target(doc)
self
.
doc_to_fewshot_prompt
(
doc
)
for
doc
in
fewshotex
]
)
+
"
\n\n
"
)
example
=
self
.
doc_to_text
(
doc
)
return
description
+
labeled_examples
+
example
class
XNLI_en
(
XNLIBase
):
# English
DATASET_NAME
=
"en"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment