Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
1f6a6ebc
Commit
1f6a6ebc
authored
Jun 27, 2024
by
lintangsutawika
Browse files
moved files out, and removed unused versions
parent
5be2bb10
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
201 additions
and
1 deletion
+201
-1
lm_eval/api/samplers.py
lm_eval/api/samplers.py
+5
-1
lm_eval/tasks/mmlu_pro/_default_template_yaml
lm_eval/tasks/mmlu_pro/_default_template_yaml
+30
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
+5
-0
lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
+5
-0
lm_eval/tasks/mmlu_pro/utils.py
lm_eval/tasks/mmlu_pro/utils.py
+96
-0
No files found.
lm_eval/api/samplers.py
View file @
1f6a6ebc
...
@@ -15,7 +15,11 @@ class ContextSampler:
...
@@ -15,7 +15,11 @@ class ContextSampler:
self
.
target_delimiter
=
self
.
config
.
target_delimiter
self
.
target_delimiter
=
self
.
config
.
target_delimiter
self
.
fewshot_delimiter
=
self
.
config
.
fewshot_delimiter
self
.
fewshot_delimiter
=
self
.
config
.
fewshot_delimiter
self
.
doc_to_text
=
self
.
task
.
doc_to_text
if
self
.
config
.
fewshot_config
is
not
None
and
self
.
config
.
fewshot_config
.
get
(
"doc_to_text"
,
None
)
is
not
None
:
self
.
doc_to_text
=
self
.
config
.
fewshot_config
.
get
(
"doc_to_text"
,
None
)
else
:
self
.
doc_to_text
=
self
.
task
.
doc_to_text
self
.
doc_to_target
=
self
.
task
.
doc_to_target
self
.
doc_to_target
=
self
.
task
.
doc_to_target
self
.
doc_to_choice
=
self
.
task
.
doc_to_choice
self
.
doc_to_choice
=
self
.
task
.
doc_to_choice
...
...
lm_eval/tasks/mmlu_pro/_default_template_yaml
0 → 100644
View file @
1f6a6ebc
dataset_path: TIGER-Lab/MMLU-Pro
test_split: test
fewshot_split: validation
fewshot_config:
sampler: first_n
doc_to_text: !function utils.fewshot_to_text
output_type: generate_until
doc_to_text: !function utils.doc_to_text
doc_to_target: answer
# filter_list:
# - name: "custom-extract"
# filter:
# - function: !function utils.CustomRegexFilter
# - function: "take_first"
generation_kwargs:
until:
- "</s>"
- "Q:"
- "<|im_end|>"
do_sample: false
temperature: 0.0
num_fewshot: 5
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
metadata:
version: 0.0
lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
0 → 100644
View file @
1f6a6ebc
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
biology.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_biology"
task_alias
:
"
biology"
process_docs
:
!function
utils.process_biology
\ No newline at end of file
lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
business"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
business.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_business"
task_alias
:
"
business"
lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
math"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
chemistry.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_math"
task_alias
:
"
math"
lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
computer_science"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
computer
science.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_computer_science"
task_alias
:
"
computer_science"
lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
economics"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
economics.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_economics"
task_alias
:
"
economics"
lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
engineering"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
engineering.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_engineering"
task_alias
:
"
engineering"
lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
health"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
health.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_health"
task_alias
:
"
health"
lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
history"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
history.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_history"
task_alias
:
"
history"
lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
law"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
law.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_law"
task_alias
:
"
law"
lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
math"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
math.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_math"
task_alias
:
"
math"
lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
other"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
other.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_other"
task_alias
:
"
other"
lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
philosophy"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
philosophy.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_philosophy"
task_alias
:
"
philosophy"
lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
physics"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
physics.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_physics"
task_alias
:
"
physics"
lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
0 → 100644
View file @
1f6a6ebc
dataset_name
:
"
psychology"
description
:
"
The
following
are
multiple
choice
questions
(with
answers)
about
psychology.
Think
step
by
step
and
then
finish
your
answer
with
\"
the
answer
is
(X)
\"
where
X
is
the
correct
letter
choice."
include
:
"
_default_template_yaml"
task
:
"
mmlu_pro_psychology"
task_alias
:
"
psychology"
lm_eval/tasks/mmlu_pro/utils.py
0 → 100644
View file @
1f6a6ebc
import
re
from
functools
import
partial
from
lm_eval.api.filter
import
Filter
choices
=
[
"A"
,
"B"
,
"C"
,
"D"
,
"E"
,
"F"
,
"G"
,
"H"
,
"I"
,
"J"
,
"K"
,
"L"
,
"M"
,
"N"
,
"O"
,
"P"
]
def
format_cot_example
(
example
,
including_answer
=
True
):
prompt
=
"Question:
\n
"
question
=
example
[
"question"
]
options
=
example
[
"options"
]
prompt
+=
question
+
"
\n
"
prompt
+=
"Options:
\n
"
for
i
,
opt
in
enumerate
(
options
):
prompt
+=
"{}. {}
\n
"
.
format
(
choices
[
i
],
opt
)
if
including_answer
:
cot_content
=
example
[
"cot_content"
].
replace
(
"A: Let's think step by step."
,
"Answer: Let's think step by step."
)
prompt
+=
cot_content
+
"
\n\n
"
else
:
prompt
+=
"Answer: Let's think step by step."
return
prompt
doc_to_text
=
partial
(
format_cot_example
,
including_answer
=
False
)
fewshot_to_text
=
partial
(
format_cot_example
,
including_answer
=
True
)
def
process_docs
(
dataset
,
subject
):
return
dataset
.
filter
(
lambda
x
:
x
[
"category"
]
==
subject
)
process_biology
=
partial
(
process_docs
,
subject
=
"biology"
)
process_business
=
partial
(
process_docs
,
subject
=
"business"
)
process_chemistry
=
partial
(
process_docs
,
subject
=
"chemistry"
)
process_computer_science
=
partial
(
process_docs
,
subject
=
"computer_science"
)
process_economics
=
partial
(
process_docs
,
subject
=
"economics"
)
process_engineering
=
partial
(
process_docs
,
subject
=
"engineering"
)
process_health
=
partial
(
process_docs
,
subject
=
"health"
)
process_history
=
partial
(
process_docs
,
subject
=
"history"
)
process_law
=
partial
(
process_docs
,
subject
=
"law"
)
process_math
=
partial
(
process_docs
,
subject
=
"math"
)
process_other
=
partial
(
process_docs
,
subject
=
"other"
)
process_philosophy
=
partial
(
process_docs
,
subject
=
"philosophy"
)
process_physics
=
partial
(
process_docs
,
subject
=
"physics"
)
process_psychology
=
partial
(
process_docs
,
subject
=
"psychology"
)
# def generate_cot_prompt(val_df, curr, k):
# prompt = ""
# with open(f"cot_prompt_lib/initial_prompt.txt", "r") as fi:
# for line in fi.readlines():
# prompt += line
# subject = curr["category"]
# val_df = select_by_category(val_df, subject)
# val_df = val_df[: k]
# prompt = prompt.replace("{$}", subject) + "\n"
# for example in val_df:
# prompt += format_cot_example(example, including_answer=True)
# prompt += format_cot_example(curr, including_answer=False)
# return prompt
class
CustomRegexFilter
(
Filter
):
""" """
def
__init__
(
self
,
regex_pattern
:
list
=
[
r
"answer is \(?([ABCDEFGHIJ])\)?"
,
r
".*[aA]nswer:\s*([A-J])"
],
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
)
->
None
:
"""
pass a string `regex` to run `re.compile(r"regex")` on.
`fallback` defines the output returned if no matches for the regex are located.
"""
self
.
regex_pattern
=
regex_pattern
self
.
regex
=
[
re
.
compile
(
pattern
)
for
pattern
in
regex_pattern
]
self
.
group_select
=
group_select
self
.
fallback
=
fallback
def
apply
(
self
,
resps
,
docs
):
# here, we assume we have a list, in which each element is
# a list of model responses for some particular input/target pair.
# so we process each of these (same input/target response sets)
# independently (and keep them a list.)
filtered_resps
=
[]
for
resp
in
resps
:
for
pattern
in
self
.
regex
:
match
=
pattern
.
search
(
resp
)
if
match
:
filtered_resps
.
append
(
match
.
group
(
1
))
break
if
len
(
filtered_resps
)
==
0
:
filtered_resps
=
[
None
]
return
filtered_resps
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment