Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
a33971ff
Commit
a33971ff
authored
Jul 24, 2024
by
root
Browse files
mmlu-pro: minor fixes
parent
25bb0c3b
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
115 additions
and
3 deletions
+115
-3
lm_eval/tasks/mmlu_pro/flan_cot_fewshot/_mmlu_pro_flan_cot_fewshot_template_yaml
...flan_cot_fewshot/_mmlu_pro_flan_cot_fewshot_template_yaml
+1
-1
lm_eval/tasks/mmlu_pro/flan_cot_zeroshot/_mmlu_pro_flan_cot_zeroshot_template_yaml
...an_cot_zeroshot/_mmlu_pro_flan_cot_zeroshot_template_yaml
+2
-2
lm_eval/tasks/mmlu_pro/flan_cot_zeroshot/utils.py
lm_eval/tasks/mmlu_pro/flan_cot_zeroshot/utils.py
+112
-0
No files found.
lm_eval/tasks/mmlu_pro/flan_cot_fewshot/_mmlu_pro_flan_cot_fewshot_template_yaml
View file @
a33971ff
...
@@ -5,7 +5,7 @@ fewshot_split: dev
...
@@ -5,7 +5,7 @@ fewshot_split: dev
fewshot_config:
fewshot_config:
sampler: first_n
sampler: first_n
output_type: generate_until
output_type: generate_until
doc_to_text: "{% set alphabet = 'ABCDEFGHIJ' %}{{ question.strip() }}\n{% for index in range(options|length) %}({{ alphabet[index] }}) {{ options[index] }} {% endfor %}\nA: Let's think step by step."
doc_to_text: "{% set alphabet = 'ABCDEFGHIJ' %}
Q:
{{ question.strip() }}\n{% for index in range(options|length) %}({{ alphabet[index] }}) {{ options[index] }} {% endfor %}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)', '(E)', '(F)', '(G)', '(H)', '(I)', '(J)'][answer_index]}}"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)', '(E)', '(F)', '(G)', '(H)', '(I)', '(J)'][answer_index]}}"
filter_list:
filter_list:
- name: "get-answer"
- name: "get-answer"
...
...
lm_eval/tasks/mmlu_pro/flan_cot_zeroshot/_mmlu_pro_flan_cot_zeroshot_template_yaml
View file @
a33971ff
...
@@ -2,8 +2,8 @@ dataset_path: sjyuxyz/MMLU-Pro-with-subset
...
@@ -2,8 +2,8 @@ dataset_path: sjyuxyz/MMLU-Pro-with-subset
validation_split: validation
validation_split: validation
fewshot_split: dev
fewshot_split: dev
output_type: generate_until
output_type: generate_until
doc_to_text: "{% set alphabet = 'ABCDEFGHIJ' %}{{ question.strip() }}\n{% for index in range(options|length) %}({{ alphabet[index] }}) {{ options[index] }} {% endfor %}\nA: Let's think step by step."
doc_to_text: "{% set alphabet = 'ABCDEFGHIJ' %}
Q:
{{ question.strip() }}\n{% for index in range(options|length) %}({{ alphabet[index] }}) {{ options[index] }} {% endfor %}\nA: Let's think step by step."
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)', '(E)', '(F)', '(G)', '(H)', '(I)', '(J)'][answer]}}"
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)', '(E)', '(F)', '(G)', '(H)', '(I)', '(J)'][answer
_index
]}}"
filter_list:
filter_list:
- name: "strict-match"
- name: "strict-match"
filter:
filter:
...
...
lm_eval/tasks/mmlu_pro/flan_cot_zeroshot/utils.py
0 → 100644
View file @
a33971ff
import
re
import
sys
import
unicodedata
from
lm_eval.filters.extraction
import
RegexFilter
class
MultiChoiceRegexFilter
(
RegexFilter
):
""" """
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
"""
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
- step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response.
- step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices.
group_select: Selects the (group_select)th match from the findall result.
ignore_case: Ignores the case during step 1 matching
ignore_punctuation: Remove the punctuation during step 1 matching
regexes_to_ignore: Remove these regexes during step 1 matching
"""
super
().
__init__
(
regex_pattern
,
group_select
,
fallback
)
self
.
ignore_case
=
ignore_case
self
.
ignore_punctuation
=
ignore_punctuation
self
.
regexes_to_ignore
=
regexes_to_ignore
def
apply
(
self
,
resps
,
docs
):
# here, we assume we have a list, in which each element is
# a list of model responses for some particular input/target pair.
# so we process each of these (same input/target response sets)
# independently (and keep them a list.)
def
find_match
(
regex
,
resp
,
convert_dict
=
{}):
match
=
regex
.
findall
(
resp
)
if
match
:
match
=
match
[
self
.
group_select
]
if
isinstance
(
match
,
tuple
):
match
=
[
m
for
m
in
match
if
m
][
0
]
match
=
match
.
strip
()
if
match
and
match
in
convert_dict
:
match
=
convert_dict
[
match
]
return
match
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
"P"
)
)
def
filter_ignores
(
st
):
if
self
.
regexes_to_ignore
is
not
None
:
for
s
in
self
.
regexes_to_ignore
:
st
=
re
.
sub
(
s
,
""
,
st
)
if
self
.
ignore_case
:
st
=
st
.
lower
()
if
self
.
ignore_punctuation
:
# https://stackoverflow.com/a/266162
st
=
st
.
translate
(
punct_tbl
)
return
st
filtered_resps
=
[]
for
r
,
doc
in
zip
(
resps
,
docs
):
fallback_regexes
=
[]
choice_to_alpha
=
{}
next_alpha
=
"A"
without_paren_fallback_regexes
=
[]
without_paren_to_target
=
{}
choices
=
doc
[
"options"
]
for
c
in
choices
:
m
=
filter_ignores
(
c
.
strip
())
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
choice_to_alpha
[
m
]
=
f
"(
{
next_alpha
}
)"
without_paren_fallback_regexes
.
append
(
next_alpha
)
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
fallback_regex
=
re
.
compile
(
"|"
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
"|"
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
filtered
=
[]
for
resp
in
r
:
match
=
find_match
(
self
.
regex
,
resp
)
if
not
match
:
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
if
not
match
:
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
if
not
match
:
match
=
self
.
fallback
filtered
.
append
(
match
)
filtered_resps
.
append
(
filtered
)
return
filtered_resps
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment