Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
90ad5db7
Commit
90ad5db7
authored
Mar 01, 2024
by
lintangsutawika
Browse files
merged main
parents
f692caa9
b177c82c
Changes
484
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
364 additions
and
53 deletions
+364
-53
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
+24
-0
lm_eval/tasks/mgsm/utils.py
lm_eval/tasks/mgsm/utils.py
+30
-9
lm_eval/tasks/minerva_math/utils.py
lm_eval/tasks/minerva_math/utils.py
+6
-3
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/_generate_configs.py
+3
-3
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
+25
-12
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
+25
-12
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
...model_written_evals/advanced_ai_risk/_generate_configs.py
+1
-2
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
...al/tasks/model_written_evals/persona/_generate_configs.py
+1
-2
lm_eval/tasks/okapi/arc_multilingual/utils.py
lm_eval/tasks/okapi/arc_multilingual/utils.py
+9
-2
No files found.
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[5+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\n逐步解答:"}}{%
else
%}{{"问题:
"+question+"\n逐步解答:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_zh_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
bn
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[17:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nধাপে
ধাপে
উত্তর:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nধাপে
ধাপে
উত্তর:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
প্রশ্ন:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_bn
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
de
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[29:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nSchritt-für-Schritt-Antwort:"}}{%
else
%}{{"Frage:
"+question+"\nSchritt-für-Schritt-Antwort:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
Die Antwort lautet (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Frage:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_de
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
en
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[21:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Question:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_en
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
es
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[23:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRespuesta
paso
a
paso:"}}{%
else
%}{{"Pregunta:
"+question+"\nRespuesta
paso
a
paso:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
La respuesta es (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Pregunta:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_es
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
fr
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[26:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRéponse
étape
par
étape
:"}}{%
else
%}{{"Question
:
"+question+"\nRéponse
étape
par
étape
:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
La réponse est (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Question
:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_fr
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
ja
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[11:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nステップごとの答え:"}}{%
else
%}{{"問題:
"+question+"\nステップごとの答え:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
答えは(\-?[0-9\.\,]+)です。
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
問題:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_ja
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
ru
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nПошаговоерешение:"}}{%
else
%}{{"Задача:
"+question+"\nПошаговоерешение:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
Ответ — (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Задача:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_ru
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[25:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
else
%}{{"Swali:
"+question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
Jibu ni (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Swali:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_sw
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[19:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nదశలవారీగా
సమాధానం:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nదశలవారీగా
సమాధానం:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
సమాధానం (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
ప్రశ్న:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_te
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
th
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nคำตอบทีละขั้นตอน:"}}{%
else
%}{{"โจทย์:
"+question+"\nคำตอบทีละขั้นตอน:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
คำตอบคือ (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
โจทย์:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_th
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\n逐步解答:"}}{%
else
%}{{"问题:
"+question+"\n逐步解答:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
答案是 (\-?[0-9\.\,]+)。
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
问题:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_zh
lm_eval/tasks/mgsm/utils.py
View file @
90ad5db7
import
yaml
import
argparse
import
yaml
LANGUAGES
=
{
"bn"
:
{
# Bengali
...
...
@@ -99,11 +100,24 @@ def add_regex_pattern(regex_pattern):
return
{
"filter_list"
:
[
{
"name"
:
"get-answer"
,
"name"
:
"strict-match"
,
"filter"
:
[
{
"function"
:
"regex"
,
"regex_pattern"
:
f
"""
{
regex_pattern
}
"""
,
},
{
"function"
:
"take_first"
,
},
],
},
{
"name"
:
"flexible-extract"
,
"filter"
:
[
{
"function"
:
"regex"
,
"regex_pattern"
:
regex_pattern
,
"regex_pattern"
:
"""(-?[$0-9.,]{2,})|(-?[0-9]+)"""
,
"group_select"
:
-
1
,
},
{
"function"
:
"take_first"
,
...
...
@@ -128,23 +142,25 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
yaml_template
=
"cot_yaml"
filter_list
=
{}
DELIMITER
=
None
if
mode
==
"direct"
:
ANSWER
=
LANGUAGES
[
lang
][
"DIRECT"
]
REGEX
=
None
task_name
=
f
"mgsm_
{
lang
}
_direct
"
task_name
=
f
"mgsm_
direct_
{
lang
}
"
yaml_template
=
"direct_yaml"
elif
mode
==
"native-cot"
:
ANSWER
=
LANGUAGES
[
lang
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
lang
][
"REGEX"
]
task_name
=
f
"mgsm_
{
lang
}
_
native
-
cot"
task_name
=
f
"mgsm_native
_
cot
_
{
lang
}
"
filter_list
=
add_regex_pattern
(
REGEX
)
DELIMITER
=
""
if
lang
in
[
"zh"
,
"ja"
]
else
None
elif
mode
==
"en-cot"
:
ANSWER
=
LANGUAGES
[
"en"
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
"en"
][
"REGEX"
]
task_name
=
f
"mgsm_
{
lang
}
_en-cot
"
task_name
=
f
"mgsm_
en_cot_
{
lang
}
"
file_name
=
f
"
{
task_name
}
.yaml"
ANSWER_TO_SKIP
=
len
(
LANGUAGES
[
lang
][
"ANSWER"
])
+
1
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
)
as
f
:
...
...
@@ -153,18 +169,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
{
"include"
:
yaml_template
,
"dataset_name"
:
lang
,
"task"
:
f
"
mgsm_
{
lang
}
_direct
"
,
"task"
:
f
"
{
task_name
}
"
,
"doc_to_text"
:
f
"""{{% if answer is not none %}}"""
f
"""{{{{question+"
\\
n
{
ANSWER
}
"}}}}"""
f
"""{{% else %}}"""
f
"""{{{{"
{
QUESTION
}
"+question+"
\\
n
{
ANSWER
}
"}}}}"""
f
"""{{% endif %}}"""
,
"doc_to_target"
:
f
"""{{% if answer is not none %}}"""
f
"""{{{{answer[
{
len
(
ANSWER
)
}
+1
]}}}}"""
f
"""{{{{answer[
{
ANSWER
_TO_SKIP
}
:
]}}}}"""
f
"""{{% else %}}"""
f
"""{{{{answer_number|string}}}}"""
f
"""{{% endif %}}"""
,
**
filter_list
,
"generation_kwargs"
:
{
"until"
:
[
QUESTION
,
"</s>"
,
"<|im_end|>"
],
"do_sample"
:
False
,
},
**
({
"target_delimiter"
:
DELIMITER
}
if
DELIMITER
else
{}),
},
f
,
allow_unicode
=
True
,
...
...
lm_eval/tasks/minerva_math/utils.py
View file @
90ad5db7
import
datasets
import
re
import
signal
from
typing
import
Dict
,
List
,
Optional
import
datasets
from
lm_eval.utils
import
eval_logger
from
typing
import
Optional
,
List
,
Dict
try
:
import
sympy
from
sympy.parsing.latex
import
parse_latex
except
ModuleNotFoundError
:
raise
Exception
(
raise
ModuleNotFoundError
(
"`sympy` is required for generating translation task prompt templates.
\
please install sympy via pip install lm-eval[math] or pip install -e .[math]"
,
)
...
...
lm_eval/tasks/mmlu/_generate_configs.py
View file @
90ad5db7
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import
os
import
yaml
import
argparse
import
os
import
yaml
from
tqdm
import
tqdm
from
lm_eval.logger
import
eval_logger
SUBJECTS
=
{
"abstract_algebra"
:
"stem"
,
"anatomy"
:
"stem"
,
...
...
@@ -124,7 +125,6 @@ if __name__ == "__main__":
yaml
.
dump
(
yaml_dict
,
yaml_file
,
# width=float("inf"),
allow_unicode
=
True
,
default_style
=
'"'
,
)
...
...
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
View file @
90ad5db7
import
re
import
sys
import
unicodedata
from
lm_eval.filters.extraction
import
RegexFilter
...
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
""" """
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
"""
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
...
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
match
=
convert_dict
[
match
]
return
match
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
'P'
))
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
"P"
)
)
def
filter_ignores
(
st
):
if
self
.
regexes_to_ignore
is
not
None
:
...
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
for
r
,
doc
in
zip
(
resps
,
docs
):
fallback_regexes
=
[]
choice_to_alpha
=
{}
next_alpha
=
'A'
next_alpha
=
"A"
without_paren_fallback_regexes
=
[]
without_paren_to_target
=
{}
choices
=
doc
[
'
choices
'
]
choices
=
doc
[
"
choices
"
]
for
c
in
choices
:
m
=
filter_ignores
(
c
.
strip
())
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
...
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
fallback_regex
=
re
.
compile
(
'|'
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
'|'
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
fallback_regex
=
re
.
compile
(
"|"
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
"|"
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
filtered
=
[]
for
resp
in
r
:
match
=
find_match
(
self
.
regex
,
resp
)
if
not
match
:
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
if
not
match
:
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
if
not
match
:
match
=
self
.
fallback
filtered
.
append
(
match
)
...
...
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
View file @
90ad5db7
import
re
import
sys
import
unicodedata
from
lm_eval.filters.extraction
import
RegexFilter
...
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
""" """
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
"""
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
...
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
match
=
convert_dict
[
match
]
return
match
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
'P'
))
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
"P"
)
)
def
filter_ignores
(
st
):
if
self
.
regexes_to_ignore
is
not
None
:
...
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
for
r
,
doc
in
zip
(
resps
,
docs
):
fallback_regexes
=
[]
choice_to_alpha
=
{}
next_alpha
=
'A'
next_alpha
=
"A"
without_paren_fallback_regexes
=
[]
without_paren_to_target
=
{}
choices
=
doc
[
'
choices
'
]
choices
=
doc
[
"
choices
"
]
for
c
in
choices
:
m
=
filter_ignores
(
c
.
strip
())
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
...
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
fallback_regex
=
re
.
compile
(
'|'
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
'|'
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
fallback_regex
=
re
.
compile
(
"|"
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
"|"
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
filtered
=
[]
for
resp
in
r
:
match
=
find_match
(
self
.
regex
,
resp
)
if
not
match
:
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
if
not
match
:
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
if
not
match
:
match
=
self
.
fallback
filtered
.
append
(
match
)
...
...
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
View file @
90ad5db7
import
yaml
import
datasets
import
yaml
from
tqdm
import
tqdm
...
...
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
View file @
90ad5db7
import
yaml
import
datasets
import
yaml
from
tqdm
import
tqdm
...
...
lm_eval/tasks/okapi/arc_multilingual/utils.py
View file @
90ad5db7
import
datasets
import
re
import
datasets
def
preprocess
(
text
):
if
text
is
None
:
...
...
@@ -18,7 +19,13 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
out_doc
=
{
"id"
:
doc
[
"id"
],
"query"
:
"Question: "
+
preprocess
(
doc
[
"instruction"
])
+
"
\n
Answer:"
,
"choices"
:
[
preprocess
(
doc
[
'option_a'
]),
preprocess
(
doc
[
'option_b'
]),
preprocess
(
doc
[
'option_c'
]),
preprocess
(
doc
[
'option_d'
]),
preprocess
(
doc
[
'option_e'
])],
"choices"
:
[
preprocess
(
doc
[
"option_a"
]),
preprocess
(
doc
[
"option_b"
]),
preprocess
(
doc
[
"option_c"
]),
preprocess
(
doc
[
"option_d"
]),
preprocess
(
doc
[
"option_e"
]),
],
"gold"
:
[
"A"
,
"B"
,
"C"
,
"D"
,
"E"
].
index
(
doc
[
"answer"
]),
}
return
out_doc
...
...
Prev
1
…
16
17
18
19
20
21
22
23
24
25
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment