Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
01b129bb
Commit
01b129bb
authored
Aug 05, 2024
by
lintangsutawika
Browse files
pre-commit
parent
89de5103
Changes
15
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
43 additions
and
33 deletions
+43
-33
lm_eval/tasks/mmlu_pro/README.md
lm_eval/tasks/mmlu_pro/README.md
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
+1
-1
lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
+1
-1
lm_eval/tasks/mmlu_pro/utils.py
lm_eval/tasks/mmlu_pro/utils.py
+29
-19
No files found.
lm_eval/tasks/mmlu_pro/README.md
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
View file @
01b129bb
lm_eval/tasks/mmlu_pro/utils.py
View file @
01b129bb
...
@@ -3,7 +3,26 @@ from functools import partial
...
@@ -3,7 +3,26 @@ from functools import partial
from
lm_eval.api.filter
import
Filter
from
lm_eval.api.filter
import
Filter
choices
=
[
"A"
,
"B"
,
"C"
,
"D"
,
"E"
,
"F"
,
"G"
,
"H"
,
"I"
,
"J"
,
"K"
,
"L"
,
"M"
,
"N"
,
"O"
,
"P"
]
choices
=
[
"A"
,
"B"
,
"C"
,
"D"
,
"E"
,
"F"
,
"G"
,
"H"
,
"I"
,
"J"
,
"K"
,
"L"
,
"M"
,
"N"
,
"O"
,
"P"
,
]
def
format_cot_example
(
example
,
including_answer
=
True
):
def
format_cot_example
(
example
,
including_answer
=
True
):
prompt
=
"Question:
\n
"
prompt
=
"Question:
\n
"
...
@@ -14,8 +33,9 @@ def format_cot_example(example, including_answer=True):
...
@@ -14,8 +33,9 @@ def format_cot_example(example, including_answer=True):
for
i
,
opt
in
enumerate
(
options
):
for
i
,
opt
in
enumerate
(
options
):
prompt
+=
"{}. {}
\n
"
.
format
(
choices
[
i
],
opt
)
prompt
+=
"{}. {}
\n
"
.
format
(
choices
[
i
],
opt
)
if
including_answer
:
if
including_answer
:
cot_content
=
example
[
"cot_content"
].
replace
(
"A: Let's think step by step."
,
cot_content
=
example
[
"cot_content"
].
replace
(
"Answer: Let's think step by step."
)
"A: Let's think step by step."
,
"Answer: Let's think step by step."
)
prompt
+=
cot_content
+
"
\n\n
"
prompt
+=
cot_content
+
"
\n\n
"
else
:
else
:
prompt
+=
"Answer: Let's think step by step."
prompt
+=
"Answer: Let's think step by step."
...
@@ -29,6 +49,7 @@ fewshot_to_text = partial(format_cot_example, including_answer=True)
...
@@ -29,6 +49,7 @@ fewshot_to_text = partial(format_cot_example, including_answer=True)
def
process_docs
(
dataset
,
subject
):
def
process_docs
(
dataset
,
subject
):
return
dataset
.
filter
(
lambda
x
:
x
[
"category"
]
==
subject
)
return
dataset
.
filter
(
lambda
x
:
x
[
"category"
]
==
subject
)
process_biology
=
partial
(
process_docs
,
subject
=
"biology"
)
process_biology
=
partial
(
process_docs
,
subject
=
"biology"
)
process_business
=
partial
(
process_docs
,
subject
=
"business"
)
process_business
=
partial
(
process_docs
,
subject
=
"business"
)
process_chemistry
=
partial
(
process_docs
,
subject
=
"chemistry"
)
process_chemistry
=
partial
(
process_docs
,
subject
=
"chemistry"
)
...
@@ -45,26 +66,15 @@ process_physics = partial(process_docs, subject="physics")
...
@@ -45,26 +66,15 @@ process_physics = partial(process_docs, subject="physics")
process_psychology
=
partial
(
process_docs
,
subject
=
"psychology"
)
process_psychology
=
partial
(
process_docs
,
subject
=
"psychology"
)
# def generate_cot_prompt(val_df, curr, k):
# prompt = ""
# with open(f"cot_prompt_lib/initial_prompt.txt", "r") as fi:
# for line in fi.readlines():
# prompt += line
# subject = curr["category"]
# val_df = select_by_category(val_df, subject)
# val_df = val_df[: k]
# prompt = prompt.replace("{$}", subject) + "\n"
# for example in val_df:
# prompt += format_cot_example(example, including_answer=True)
# prompt += format_cot_example(curr, including_answer=False)
# return prompt
class
CustomRegexFilter
(
Filter
):
class
CustomRegexFilter
(
Filter
):
""" """
""" """
def
__init__
(
def
__init__
(
self
,
self
,
regex_pattern
:
list
=
[
r
"answer is \(?([ABCDEFGHIJ])\)?"
,
r
".*[aA]nswer:\s*([A-J])"
],
regex_pattern
:
list
=
[
r
"answer is \(?([ABCDEFGHIJ])\)?"
,
r
".*[aA]nswer:\s*([A-J])"
,
],
group_select
=
0
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
fallback
:
str
=
"[invalid]"
,
)
->
None
:
)
->
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment