Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
48c6bd65
Commit
48c6bd65
authored
May 13, 2023
by
Oleh Shliazhko
Browse files
fix mmlu task, set updated dataset name and make the prompt identical to the original eval code
parent
d1451679
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
14 deletions
+17
-14
lm_eval/tasks/hendrycks_test.py
lm_eval/tasks/hendrycks_test.py
+17
-14
No files found.
lm_eval/tasks/hendrycks_test.py
View file @
48c6bd65
...
@@ -14,7 +14,6 @@ Homepage: https://github.com/hendrycks/test
...
@@ -14,7 +14,6 @@ Homepage: https://github.com/hendrycks/test
"""
"""
from
lm_eval.base
import
MultipleChoiceTask
from
lm_eval.base
import
MultipleChoiceTask
_CITATION
=
"""
_CITATION
=
"""
@article{hendryckstest2021,
@article{hendryckstest2021,
title={Measuring Massive Multitask Language Understanding},
title={Measuring Massive Multitask Language Understanding},
...
@@ -104,7 +103,7 @@ def create_task(subject):
...
@@ -104,7 +103,7 @@ def create_task(subject):
class
GeneralHendrycksTest
(
MultipleChoiceTask
):
class
GeneralHendrycksTest
(
MultipleChoiceTask
):
VERSION
=
0
VERSION
=
0
DATASET_PATH
=
"
hendrycks_test
"
DATASET_PATH
=
"
cais/mmlu
"
DATASET_NAME
=
None
DATASET_NAME
=
None
def
__init__
(
self
,
subject
):
def
__init__
(
self
,
subject
):
...
@@ -112,7 +111,7 @@ class GeneralHendrycksTest(MultipleChoiceTask):
...
@@ -112,7 +111,7 @@ class GeneralHendrycksTest(MultipleChoiceTask):
super
().
__init__
()
super
().
__init__
()
def
has_training_docs
(
self
):
def
has_training_docs
(
self
):
return
Fals
e
return
Tru
e
def
has_validation_docs
(
self
):
def
has_validation_docs
(
self
):
return
True
return
True
...
@@ -126,41 +125,45 @@ class GeneralHendrycksTest(MultipleChoiceTask):
...
@@ -126,41 +125,45 @@ class GeneralHendrycksTest(MultipleChoiceTask):
def
test_docs
(
self
):
def
test_docs
(
self
):
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"test"
])
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"test"
])
def
fewshot_context
(
self
,
doc
,
num_fewshot
,
**
kwargs
):
subject
=
self
.
DATASET_NAME
description
=
f
"The following are multiple choice questions (with answers) about
{
subject
}
."
kwargs
[
"description"
]
=
description
return
super
().
fewshot_context
(
doc
=
doc
,
num_fewshot
=
num_fewshot
,
**
kwargs
)
def
_process_doc
(
self
,
doc
):
def
_process_doc
(
self
,
doc
):
def
format_example
(
doc
,
keys
):
def
format_example
(
doc
,
keys
):
"""
"""
Question: <prompt>
<prompt>
Choices:
A. <choice1>
A. <choice1>
B. <choice2>
B. <choice2>
C. <choice3>
C. <choice3>
D. <choice4>
D. <choice4>
Answer:
Answer:
"""
"""
prompt
=
"Question: "
+
doc
[
"question"
]
+
"
\n
Choices:
\n
"
prompt
+=
""
.
join
(
question
=
doc
[
"question"
]
choices
=
""
.
join
(
[
f
"
{
key
}
.
{
choice
}
\n
"
for
key
,
choice
in
zip
(
keys
,
doc
[
"choices"
])]
[
f
"
{
key
}
.
{
choice
}
\n
"
for
key
,
choice
in
zip
(
keys
,
doc
[
"choices"
])]
)
)
prompt
+
=
"
Answer:"
prompt
=
f
"
{
question
}
\n
{
choices
}
Answer:"
return
prompt
return
prompt
keys
=
[
"A"
,
"B"
,
"C"
,
"D"
]
keys
=
[
"A"
,
"B"
,
"C"
,
"D"
]
return
{
return
{
"query"
:
format_example
(
doc
,
keys
),
"query"
:
format_example
(
doc
,
keys
),
"choices"
:
doc
[
"choices"
],
"choices"
:
keys
,
"gold"
:
keys
.
index
(
doc
[
"answer"
])
"gold"
:
doc
[
"answer"
],
if
isinstance
(
doc
[
"answer"
],
str
)
else
doc
[
"answer"
],
}
}
return
result
def
fewshot_examples
(
self
,
k
,
rnd
):
def
fewshot_examples
(
self
,
k
,
rnd
):
# fewshot_examples is not just sampling from train_docs because dev is
# fewshot_examples is not just sampling from train_docs because dev is
# in the same distribution as val/test but auxiliary_train isn't
# in the same distribution as val/test but auxiliary_train isn't
if
self
.
_fewshot_docs
is
None
:
if
self
.
_fewshot_docs
is
None
:
self
.
_fewshot_docs
=
list
(
map
(
self
.
_process_doc
,
self
.
dataset
[
"dev"
]))
self
.
_fewshot_docs
=
list
(
map
(
self
.
_process_doc
,
self
.
dataset
[
"dev"
]))
return
rnd
.
sample
(
list
(
self
.
_fewshot_docs
),
k
)
return
self
.
_fewshot_docs
[:
k
]
#
rnd.sample(list(self._fewshot_docs), k)
def
doc_to_text
(
self
,
doc
):
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
return
doc
[
"query"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment