Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
458342e2
Commit
458342e2
authored
Aug 05, 2024
by
lintangsutawika
Browse files
format
parent
b8122d98
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
44 deletions
+3
-44
lm_eval/tasks/mmlu_pro/_default_template_yaml
lm_eval/tasks/mmlu_pro/_default_template_yaml
+3
-1
lm_eval/tasks/mmlu_pro/utils.py
lm_eval/tasks/mmlu_pro/utils.py
+0
-43
No files found.
lm_eval/tasks/mmlu_pro/_default_template_yaml
View file @
458342e2
...
@@ -11,7 +11,9 @@ doc_to_target: answer
...
@@ -11,7 +11,9 @@ doc_to_target: answer
filter_list:
filter_list:
- name: "custom-extract"
- name: "custom-extract"
filter:
filter:
- function: !function utils.CustomRegexFilter
- function: "regex"
regex_pattern: r"answer is \(?([ABCDEFGHIJ])\)?"
# regex_pattern: r".*[aA]nswer:\s*([A-J])",
- function: "take_first"
- function: "take_first"
generation_kwargs:
generation_kwargs:
until:
until:
...
...
lm_eval/tasks/mmlu_pro/utils.py
View file @
458342e2
import
re
from
functools
import
partial
from
functools
import
partial
from
lm_eval.api.filter
import
Filter
choices
=
[
choices
=
[
"A"
,
"A"
,
...
@@ -64,43 +61,3 @@ process_other = partial(process_docs, subject="other")
...
@@ -64,43 +61,3 @@ process_other = partial(process_docs, subject="other")
process_philosophy
=
partial
(
process_docs
,
subject
=
"philosophy"
)
process_philosophy
=
partial
(
process_docs
,
subject
=
"philosophy"
)
process_physics
=
partial
(
process_docs
,
subject
=
"physics"
)
process_physics
=
partial
(
process_docs
,
subject
=
"physics"
)
process_psychology
=
partial
(
process_docs
,
subject
=
"psychology"
)
process_psychology
=
partial
(
process_docs
,
subject
=
"psychology"
)
class
CustomRegexFilter
(
Filter
):
""" """
def
__init__
(
self
,
regex_pattern
:
list
=
[
r
"answer is \(?([ABCDEFGHIJ])\)?"
,
r
".*[aA]nswer:\s*([A-J])"
,
],
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
)
->
None
:
"""
pass a string `regex` to run `re.compile(r"regex")` on.
`fallback` defines the output returned if no matches for the regex are located.
"""
self
.
regex_pattern
=
regex_pattern
self
.
regex
=
[
re
.
compile
(
pattern
)
for
pattern
in
regex_pattern
]
self
.
group_select
=
group_select
self
.
fallback
=
fallback
def
apply
(
self
,
resps
,
docs
):
# here, we assume we have a list, in which each element is
# a list of model responses for some particular input/target pair.
# so we process each of these (same input/target response sets)
# independently (and keep them a list.)
filtered_resps
=
[]
for
resp
in
resps
:
for
pattern
in
self
.
regex
:
match
=
pattern
.
search
(
resp
)
if
match
:
filtered_resps
.
append
(
match
.
group
(
1
))
break
if
len
(
filtered_resps
)
==
0
:
filtered_resps
=
[
None
]
return
filtered_resps
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment