Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d27c0c08
Unverified
Commit
d27c0c08
authored
Feb 26, 2024
by
LSinev
Committed by
GitHub
Feb 26, 2024
Browse files
Apply code autoformatting with Ruff to tasks/*.py an *__init__.py (#1469)
parent
f78e2da4
Changes
48
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
124 additions
and
54 deletions
+124
-54
lm_eval/tasks/hellaswag/utils.py
lm_eval/tasks/hellaswag/utils.py
+2
-1
lm_eval/tasks/ifeval/instructions.py
lm_eval/tasks/ifeval/instructions.py
+2
-0
lm_eval/tasks/ifeval/instructions_registry.py
lm_eval/tasks/ifeval/instructions_registry.py
+1
-0
lm_eval/tasks/kobest/utils.py
lm_eval/tasks/kobest/utils.py
+13
-2
lm_eval/tasks/medmcqa/utils_medmcqa.py
lm_eval/tasks/medmcqa/utils_medmcqa.py
+6
-1
lm_eval/tasks/medqa/preprocess_medqa.py
lm_eval/tasks/medqa/preprocess_medqa.py
+6
-1
lm_eval/tasks/mgsm/utils.py
lm_eval/tasks/mgsm/utils.py
+5
-3
lm_eval/tasks/minerva_math/utils.py
lm_eval/tasks/minerva_math/utils.py
+6
-3
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/_generate_configs.py
+3
-3
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
+25
-12
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
+25
-12
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
...model_written_evals/advanced_ai_risk/_generate_configs.py
+1
-2
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
...al/tasks/model_written_evals/persona/_generate_configs.py
+1
-2
lm_eval/tasks/okapi/arc_multilingual/utils.py
lm_eval/tasks/okapi/arc_multilingual/utils.py
+9
-2
lm_eval/tasks/okapi/hellaswag_multilingual/utils.py
lm_eval/tasks/okapi/hellaswag_multilingual/utils.py
+2
-1
lm_eval/tasks/okapi/mmlu_multilingual/_generate_configs.py
lm_eval/tasks/okapi/mmlu_multilingual/_generate_configs.py
+8
-4
lm_eval/tasks/okapi/truthfulqa_multilingual/utils.py
lm_eval/tasks/okapi/truthfulqa_multilingual/utils.py
+4
-1
lm_eval/tasks/qasper/metrics.py
lm_eval/tasks/qasper/metrics.py
+0
-1
lm_eval/tasks/qasper/utils.py
lm_eval/tasks/qasper/utils.py
+2
-1
lm_eval/tasks/realtoxicityprompts/metric.py
lm_eval/tasks/realtoxicityprompts/metric.py
+3
-2
No files found.
lm_eval/tasks/hellaswag/utils.py
View file @
d27c0c08
import
datasets
import
re
import
re
import
datasets
def
preprocess
(
text
):
def
preprocess
(
text
):
text
=
text
.
strip
()
text
=
text
.
strip
()
...
...
lm_eval/tasks/ifeval/instructions.py
View file @
d27c0c08
...
@@ -22,8 +22,10 @@ import string
...
@@ -22,8 +22,10 @@ import string
from
typing
import
Dict
,
Optional
,
Sequence
,
Union
from
typing
import
Dict
,
Optional
,
Sequence
,
Union
import
langdetect
import
langdetect
from
lm_eval.tasks.ifeval
import
instructions_util
from
lm_eval.tasks.ifeval
import
instructions_util
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
_InstructionArgsDtype
=
Optional
[
Dict
[
str
,
Union
[
int
,
str
,
Sequence
[
str
]]]]
_InstructionArgsDtype
=
Optional
[
Dict
[
str
,
Union
[
int
,
str
,
Sequence
[
str
]]]]
...
...
lm_eval/tasks/ifeval/instructions_registry.py
View file @
d27c0c08
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
"""Registry of all instructions."""
"""Registry of all instructions."""
from
lm_eval.tasks.ifeval
import
instructions
from
lm_eval.tasks.ifeval
import
instructions
_KEYWORD
=
"keywords:"
_KEYWORD
=
"keywords:"
_LANGUAGE
=
"language:"
_LANGUAGE
=
"language:"
...
...
lm_eval/tasks/kobest/utils.py
View file @
d27c0c08
...
@@ -6,32 +6,43 @@ def copa_doc_to_text(doc: dict) -> str:
...
@@ -6,32 +6,43 @@ def copa_doc_to_text(doc: dict) -> str:
connector
=
{
"원인"
:
" 왜냐하면"
,
"결과"
:
" 그래서"
}[
doc
[
"question"
].
strip
()]
connector
=
{
"원인"
:
" 왜냐하면"
,
"결과"
:
" 그래서"
}[
doc
[
"question"
].
strip
()]
return
f
"""
{
doc
[
"premise"
]
}
{
connector
}
"""
return
f
"""
{
doc
[
"premise"
]
}
{
connector
}
"""
def
copa_doc_to_target
(
doc
:
dict
)
->
str
:
def
copa_doc_to_target
(
doc
:
dict
)
->
str
:
correct_choice
=
doc
[
"alternative_1"
]
if
doc
[
"label"
]
==
0
else
doc
[
"alternative_2"
]
correct_choice
=
doc
[
"alternative_1"
]
if
doc
[
"label"
]
==
0
else
doc
[
"alternative_2"
]
return
f
"""
{
correct_choice
}
"""
return
f
"""
{
correct_choice
}
"""
def
copa_doc_to_choice
(
doc
:
dict
)
->
list
:
def
copa_doc_to_choice
(
doc
:
dict
)
->
list
:
return
[
f
"""
{
doc
[
"alternative_1"
]
}
"""
,
f
"""
{
doc
[
"alternative_2"
]
}
"""
]
return
[
f
"""
{
doc
[
"alternative_1"
]
}
"""
,
f
"""
{
doc
[
"alternative_2"
]
}
"""
]
def
sentineg_doc_to_text
(
doc
:
dict
):
def
sentineg_doc_to_text
(
doc
:
dict
):
return
f
"""문장:
{
doc
[
"sentence"
]
}
긍부정:"""
return
f
"""문장:
{
doc
[
"sentence"
]
}
긍부정:"""
def
wic_doc_to_text
(
doc
:
dict
)
->
str
:
def
wic_doc_to_text
(
doc
:
dict
)
->
str
:
return
f
"""문장1:
{
doc
[
"context_1"
]
}
문장2:
{
doc
[
"context_2"
]
}
두 문장에서
{
doc
[
"word"
]
}
가 같은 뜻으로 쓰였나?"""
return
f
"""문장1:
{
doc
[
"context_1"
]
}
문장2:
{
doc
[
"context_2"
]
}
두 문장에서
{
doc
[
"word"
]
}
가 같은 뜻으로 쓰였나?"""
def
hellaswag_process_doc
(
doc
:
Dataset
)
->
Dataset
:
def
hellaswag_process_doc
(
doc
:
Dataset
)
->
Dataset
:
def
preprocessor
(
dataset
):
def
preprocessor
(
dataset
):
return
{
return
{
"query"
:
f
"""문장:
{
dataset
[
"context"
]
}
"""
,
"query"
:
f
"""문장:
{
dataset
[
"context"
]
}
"""
,
"choices"
:
[
dataset
[
"ending_1"
],
dataset
[
"ending_2"
],
dataset
[
"ending_3"
],
dataset
[
"ending_4"
]],
"choices"
:
[
dataset
[
"ending_1"
],
dataset
[
"ending_2"
],
dataset
[
"ending_3"
],
dataset
[
"ending_4"
],
],
"gold"
:
int
(
dataset
[
"label"
]),
"gold"
:
int
(
dataset
[
"label"
]),
}
}
return
doc
.
map
(
preprocessor
)
return
doc
.
map
(
preprocessor
)
def
macro_f1_score
(
items
):
def
macro_f1_score
(
items
):
unzipped_list
=
list
(
zip
(
*
items
))
unzipped_list
=
list
(
zip
(
*
items
))
golds
=
unzipped_list
[
0
]
golds
=
unzipped_list
[
0
]
preds
=
unzipped_list
[
1
]
preds
=
unzipped_list
[
1
]
fscore
=
f1_score
(
golds
,
preds
,
average
=
'
macro
'
)
fscore
=
f1_score
(
golds
,
preds
,
average
=
"
macro
"
)
return
fscore
return
fscore
lm_eval/tasks/medmcqa/utils_medmcqa.py
View file @
d27c0c08
...
@@ -10,7 +10,12 @@ def doc_to_text(doc) -> str:
...
@@ -10,7 +10,12 @@ def doc_to_text(doc) -> str:
Answer:
Answer:
"""
"""
choices
=
[
doc
[
"opa"
],
doc
[
"opb"
],
doc
[
"opc"
],
doc
[
"opd"
]]
choices
=
[
doc
[
"opa"
],
doc
[
"opb"
],
doc
[
"opc"
],
doc
[
"opd"
]]
option_choices
=
{
'A'
:
choices
[
0
],
'B'
:
choices
[
1
],
'C'
:
choices
[
2
],
'D'
:
choices
[
3
]}
option_choices
=
{
"A"
:
choices
[
0
],
"B"
:
choices
[
1
],
"C"
:
choices
[
2
],
"D"
:
choices
[
3
],
}
prompt
=
"Question: "
+
doc
[
"question"
]
+
"
\n
Choices:
\n
"
prompt
=
"Question: "
+
doc
[
"question"
]
+
"
\n
Choices:
\n
"
for
choice
,
option
in
option_choices
.
items
():
for
choice
,
option
in
option_choices
.
items
():
...
...
lm_eval/tasks/medqa/preprocess_medqa.py
View file @
d27c0c08
def
doc_to_text
(
doc
)
->
str
:
def
doc_to_text
(
doc
)
->
str
:
option_choices
=
{
'A'
:
doc
[
"ending0"
],
'B'
:
doc
[
"ending1"
],
'C'
:
doc
[
"ending2"
],
'D'
:
doc
[
"ending3"
]}
option_choices
=
{
"A"
:
doc
[
"ending0"
],
"B"
:
doc
[
"ending1"
],
"C"
:
doc
[
"ending2"
],
"D"
:
doc
[
"ending3"
],
}
answers
=
""
.
join
((
f
"
{
k
}
.
{
v
}
\n
"
)
for
k
,
v
in
option_choices
.
items
())
answers
=
""
.
join
((
f
"
{
k
}
.
{
v
}
\n
"
)
for
k
,
v
in
option_choices
.
items
())
return
f
"Question:
{
doc
[
'sent1'
]
}
\n
{
answers
}
Answer:"
return
f
"Question:
{
doc
[
'sent1'
]
}
\n
{
answers
}
Answer:"
...
...
lm_eval/tasks/mgsm/utils.py
View file @
d27c0c08
import
yaml
import
argparse
import
argparse
import
yaml
LANGUAGES
=
{
LANGUAGES
=
{
"bn"
:
{
# Bengali
"bn"
:
{
# Bengali
...
@@ -126,6 +127,7 @@ def add_regex_pattern(regex_pattern):
...
@@ -126,6 +127,7 @@ def add_regex_pattern(regex_pattern):
],
],
}
}
def
gen_lang_yamls
(
output_dir
:
str
,
overwrite
:
bool
,
mode
:
str
)
->
None
:
def
gen_lang_yamls
(
output_dir
:
str
,
overwrite
:
bool
,
mode
:
str
)
->
None
:
"""
"""
Generate a yaml file for each language.
Generate a yaml file for each language.
...
@@ -158,7 +160,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -158,7 +160,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
task_name
=
f
"mgsm_en_cot_
{
lang
}
"
task_name
=
f
"mgsm_en_cot_
{
lang
}
"
file_name
=
f
"
{
task_name
}
.yaml"
file_name
=
f
"
{
task_name
}
.yaml"
ANSWER_TO_SKIP
=
len
(
LANGUAGES
[
lang
][
"ANSWER"
])
+
1
ANSWER_TO_SKIP
=
len
(
LANGUAGES
[
lang
][
"ANSWER"
])
+
1
with
open
(
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
)
as
f
:
)
as
f
:
...
@@ -181,7 +183,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -181,7 +183,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
**
filter_list
,
**
filter_list
,
"generation_kwargs"
:
{
"generation_kwargs"
:
{
"until"
:
[
QUESTION
,
"</s>"
,
"<|im_end|>"
],
"until"
:
[
QUESTION
,
"</s>"
,
"<|im_end|>"
],
"do_sample"
:
False
"do_sample"
:
False
,
},
},
**
({
"target_delimiter"
:
DELIMITER
}
if
DELIMITER
else
{}),
**
({
"target_delimiter"
:
DELIMITER
}
if
DELIMITER
else
{}),
},
},
...
...
lm_eval/tasks/minerva_math/utils.py
View file @
d27c0c08
import
datasets
import
re
import
re
import
signal
import
signal
from
typing
import
Dict
,
List
,
Optional
import
datasets
from
lm_eval.utils
import
eval_logger
from
lm_eval.utils
import
eval_logger
from
typing
import
Optional
,
List
,
Dict
try
:
try
:
import
sympy
import
sympy
from
sympy.parsing.latex
import
parse_latex
from
sympy.parsing.latex
import
parse_latex
except
ModuleNotFoundError
:
except
ModuleNotFoundError
:
raise
Exception
(
raise
ModuleNotFoundError
(
"`sympy` is required for generating translation task prompt templates.
\
"`sympy` is required for generating translation task prompt templates.
\
please install sympy via pip install lm-eval[math] or pip install -e .[math]"
,
please install sympy via pip install lm-eval[math] or pip install -e .[math]"
,
)
)
...
...
lm_eval/tasks/mmlu/_generate_configs.py
View file @
d27c0c08
"""
"""
Take in a YAML, and output all "other" splits with this YAML
Take in a YAML, and output all "other" splits with this YAML
"""
"""
import
os
import
yaml
import
argparse
import
argparse
import
os
import
yaml
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
lm_eval.logger
import
eval_logger
from
lm_eval.logger
import
eval_logger
SUBJECTS
=
{
SUBJECTS
=
{
"abstract_algebra"
:
"stem"
,
"abstract_algebra"
:
"stem"
,
"anatomy"
:
"stem"
,
"anatomy"
:
"stem"
,
...
@@ -124,7 +125,6 @@ if __name__ == "__main__":
...
@@ -124,7 +125,6 @@ if __name__ == "__main__":
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
# width=float("inf"),
allow_unicode
=
True
,
allow_unicode
=
True
,
default_style
=
'"'
,
default_style
=
'"'
,
)
)
...
...
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
View file @
d27c0c08
import
re
import
re
import
sys
import
sys
import
unicodedata
import
unicodedata
from
lm_eval.filters.extraction
import
RegexFilter
from
lm_eval.filters.extraction
import
RegexFilter
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
""" """
""" """
def
__init__
(
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
self
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
)
->
None
:
"""
"""
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
match
=
convert_dict
[
match
]
match
=
convert_dict
[
match
]
return
match
return
match
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
punct_tbl
=
dict
.
fromkeys
(
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
'P'
))
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
"P"
)
)
def
filter_ignores
(
st
):
def
filter_ignores
(
st
):
if
self
.
regexes_to_ignore
is
not
None
:
if
self
.
regexes_to_ignore
is
not
None
:
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
for
r
,
doc
in
zip
(
resps
,
docs
):
for
r
,
doc
in
zip
(
resps
,
docs
):
fallback_regexes
=
[]
fallback_regexes
=
[]
choice_to_alpha
=
{}
choice_to_alpha
=
{}
next_alpha
=
'A'
next_alpha
=
"A"
without_paren_fallback_regexes
=
[]
without_paren_fallback_regexes
=
[]
without_paren_to_target
=
{}
without_paren_to_target
=
{}
choices
=
doc
[
'
choices
'
]
choices
=
doc
[
"
choices
"
]
for
c
in
choices
:
for
c
in
choices
:
m
=
filter_ignores
(
c
.
strip
())
m
=
filter_ignores
(
c
.
strip
())
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
fallback_regex
=
re
.
compile
(
'|'
.
join
(
fallback_regexes
))
fallback_regex
=
re
.
compile
(
"|"
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
'|'
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
"|"
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
filtered
=
[]
filtered
=
[]
for
resp
in
r
:
for
resp
in
r
:
match
=
find_match
(
self
.
regex
,
resp
)
match
=
find_match
(
self
.
regex
,
resp
)
if
not
match
:
if
not
match
:
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
if
not
match
:
if
not
match
:
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
if
not
match
:
if
not
match
:
match
=
self
.
fallback
match
=
self
.
fallback
filtered
.
append
(
match
)
filtered
.
append
(
match
)
...
...
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
View file @
d27c0c08
import
re
import
re
import
sys
import
sys
import
unicodedata
import
unicodedata
from
lm_eval.filters.extraction
import
RegexFilter
from
lm_eval.filters.extraction
import
RegexFilter
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
""" """
""" """
def
__init__
(
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
self
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
)
->
None
:
"""
"""
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
match
=
convert_dict
[
match
]
match
=
convert_dict
[
match
]
return
match
return
match
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
punct_tbl
=
dict
.
fromkeys
(
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
'P'
))
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
"P"
)
)
def
filter_ignores
(
st
):
def
filter_ignores
(
st
):
if
self
.
regexes_to_ignore
is
not
None
:
if
self
.
regexes_to_ignore
is
not
None
:
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
for
r
,
doc
in
zip
(
resps
,
docs
):
for
r
,
doc
in
zip
(
resps
,
docs
):
fallback_regexes
=
[]
fallback_regexes
=
[]
choice_to_alpha
=
{}
choice_to_alpha
=
{}
next_alpha
=
'A'
next_alpha
=
"A"
without_paren_fallback_regexes
=
[]
without_paren_fallback_regexes
=
[]
without_paren_to_target
=
{}
without_paren_to_target
=
{}
choices
=
doc
[
'
choices
'
]
choices
=
doc
[
"
choices
"
]
for
c
in
choices
:
for
c
in
choices
:
m
=
filter_ignores
(
c
.
strip
())
m
=
filter_ignores
(
c
.
strip
())
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
fallback_regex
=
re
.
compile
(
'|'
.
join
(
fallback_regexes
))
fallback_regex
=
re
.
compile
(
"|"
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
'|'
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
"|"
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
filtered
=
[]
filtered
=
[]
for
resp
in
r
:
for
resp
in
r
:
match
=
find_match
(
self
.
regex
,
resp
)
match
=
find_match
(
self
.
regex
,
resp
)
if
not
match
:
if
not
match
:
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
if
not
match
:
if
not
match
:
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
if
not
match
:
if
not
match
:
match
=
self
.
fallback
match
=
self
.
fallback
filtered
.
append
(
match
)
filtered
.
append
(
match
)
...
...
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
View file @
d27c0c08
import
yaml
import
datasets
import
datasets
import
yaml
from
tqdm
import
tqdm
from
tqdm
import
tqdm
...
...
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
View file @
d27c0c08
import
yaml
import
datasets
import
datasets
import
yaml
from
tqdm
import
tqdm
from
tqdm
import
tqdm
...
...
lm_eval/tasks/okapi/arc_multilingual/utils.py
View file @
d27c0c08
import
datasets
import
re
import
re
import
datasets
def
preprocess
(
text
):
def
preprocess
(
text
):
if
text
is
None
:
if
text
is
None
:
...
@@ -18,7 +19,13 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
...
@@ -18,7 +19,13 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
out_doc
=
{
out_doc
=
{
"id"
:
doc
[
"id"
],
"id"
:
doc
[
"id"
],
"query"
:
"Question: "
+
preprocess
(
doc
[
"instruction"
])
+
"
\n
Answer:"
,
"query"
:
"Question: "
+
preprocess
(
doc
[
"instruction"
])
+
"
\n
Answer:"
,
"choices"
:
[
preprocess
(
doc
[
'option_a'
]),
preprocess
(
doc
[
'option_b'
]),
preprocess
(
doc
[
'option_c'
]),
preprocess
(
doc
[
'option_d'
]),
preprocess
(
doc
[
'option_e'
])],
"choices"
:
[
preprocess
(
doc
[
"option_a"
]),
preprocess
(
doc
[
"option_b"
]),
preprocess
(
doc
[
"option_c"
]),
preprocess
(
doc
[
"option_d"
]),
preprocess
(
doc
[
"option_e"
]),
],
"gold"
:
[
"A"
,
"B"
,
"C"
,
"D"
,
"E"
].
index
(
doc
[
"answer"
]),
"gold"
:
[
"A"
,
"B"
,
"C"
,
"D"
,
"E"
].
index
(
doc
[
"answer"
]),
}
}
return
out_doc
return
out_doc
...
...
lm_eval/tasks/okapi/hellaswag_multilingual/utils.py
View file @
d27c0c08
import
datasets
import
re
import
re
import
datasets
def
preprocess
(
text
):
def
preprocess
(
text
):
text
=
text
.
strip
()
text
=
text
.
strip
()
...
...
lm_eval/tasks/okapi/mmlu_multilingual/_generate_configs.py
View file @
d27c0c08
import
yaml
import
datasets
import
datasets
import
yaml
from
tqdm
import
tqdm
from
tqdm
import
tqdm
...
@@ -10,8 +9,12 @@ def main() -> None:
...
@@ -10,8 +9,12 @@ def main() -> None:
# Removed hy and sk subdataset because the original dataset is broken
# Removed hy and sk subdataset because the original dataset is broken
# I created this PR https://huggingface.co/datasets/alexandrainst/m_mmlu/discussions/3
# I created this PR https://huggingface.co/datasets/alexandrainst/m_mmlu/discussions/3
# on the dataset for the authors, in case it will be accepeted the filter can be removed
# on the dataset for the authors, in case it will be accepeted the filter can be removed
keys_without_hy_sk
=
list
(
filter
(
lambda
k
:
(
'hy'
not
in
k
and
'sk'
not
in
k
),
keys_without_hy_sk
=
list
(
# noqa: F841
datasets
.
get_dataset_infos
(
dataset_path
).
keys
()))
filter
(
lambda
k
:
(
"hy"
not
in
k
and
"sk"
not
in
k
),
datasets
.
get_dataset_infos
(
dataset_path
).
keys
(),
)
)
for
task
in
tqdm
():
for
task
in
tqdm
():
file_name
=
f
"m_mmlu_
{
task
}
.yaml"
file_name
=
f
"m_mmlu_
{
task
}
.yaml"
...
@@ -29,5 +32,6 @@ def main() -> None:
...
@@ -29,5 +32,6 @@ def main() -> None:
except
FileExistsError
:
except
FileExistsError
:
pass
pass
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
()
main
()
lm_eval/tasks/okapi/truthfulqa_multilingual/utils.py
View file @
d27c0c08
import
datasets
import
re
import
re
import
datasets
import
numpy
as
np
import
numpy
as
np
QA_PROMPT
=
(
QA_PROMPT
=
(
"Q: What is human life expectancy in the United States?
\n
"
"Q: What is human life expectancy in the United States?
\n
"
"A: Human life expectancy in the United States is 78 years.
\n\n
"
"A: Human life expectancy in the United States is 78 years.
\n\n
"
...
@@ -17,6 +19,7 @@ QA_PROMPT = (
...
@@ -17,6 +19,7 @@ QA_PROMPT = (
"A: The 1992 Olympics were held in Barcelona, Spain."
"A: The 1992 Olympics were held in Barcelona, Spain."
)
)
def
preprocess
(
text
):
def
preprocess
(
text
):
if
text
is
None
:
if
text
is
None
:
return
" "
return
" "
...
...
lm_eval/tasks/qasper/metrics.py
View file @
d27c0c08
import
re
import
re
import
string
import
string
from
collections
import
Counter
from
collections
import
Counter
...
...
lm_eval/tasks/qasper/utils.py
View file @
d27c0c08
from
datasets
import
Dataset
from
functools
import
partial
from
functools
import
partial
from
datasets
import
Dataset
def
process_docs
(
dataset
,
set_answer_type
=
"bool"
):
def
process_docs
(
dataset
,
set_answer_type
=
"bool"
):
FEATURES
=
[
"title"
,
"abstract"
,
"question"
,
"answer"
,
"answer_type"
]
FEATURES
=
[
"title"
,
"abstract"
,
"question"
,
"answer"
,
"answer_type"
]
...
...
lm_eval/tasks/realtoxicityprompts/metric.py
View file @
d27c0c08
import
os
import
json
import
json
import
requests
import
os
import
numpy
as
np
import
numpy
as
np
import
requests
from
lm_eval.utils
import
eval_logger
from
lm_eval.utils
import
eval_logger
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment