Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
90ad5db7
Commit
90ad5db7
authored
Mar 01, 2024
by
lintangsutawika
Browse files
merged main
parents
f692caa9
b177c82c
Changes
484
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
364 additions
and
53 deletions
+364
-53
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
+24
-0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
+24
-0
lm_eval/tasks/mgsm/utils.py
lm_eval/tasks/mgsm/utils.py
+30
-9
lm_eval/tasks/minerva_math/utils.py
lm_eval/tasks/minerva_math/utils.py
+6
-3
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/_generate_configs.py
+3
-3
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
+25
-12
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
+25
-12
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
...model_written_evals/advanced_ai_risk/_generate_configs.py
+1
-2
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
...al/tasks/model_written_evals/persona/_generate_configs.py
+1
-2
lm_eval/tasks/okapi/arc_multilingual/utils.py
lm_eval/tasks/okapi/arc_multilingual/utils.py
+9
-2
No files found.
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[5+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\n逐步解答:"}}{%
else
%}{{"问题:
"+question+"\n逐步解答:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_zh_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
bn
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[17:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nধাপে
ধাপে
উত্তর:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nধাপে
ধাপে
উত্তর:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
প্রশ্ন:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_bn
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
de
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[29:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nSchritt-für-Schritt-Antwort:"}}{%
else
%}{{"Frage:
"+question+"\nSchritt-für-Schritt-Antwort:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
Die Antwort lautet (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Frage:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_de
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
en
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[21:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Question:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_en
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
es
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[23:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRespuesta
paso
a
paso:"}}{%
else
%}{{"Pregunta:
"+question+"\nRespuesta
paso
a
paso:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
La respuesta es (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Pregunta:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_es
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
fr
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[26:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRéponse
étape
par
étape
:"}}{%
else
%}{{"Question
:
"+question+"\nRéponse
étape
par
étape
:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
La réponse est (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Question
:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_fr
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
ja
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[11:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nステップごとの答え:"}}{%
else
%}{{"問題:
"+question+"\nステップごとの答え:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
答えは(\-?[0-9\.\,]+)です。
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
問題:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_ja
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
ru
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nПошаговоерешение:"}}{%
else
%}{{"Задача:
"+question+"\nПошаговоерешение:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
Ответ — (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Задача:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_ru
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[25:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
else
%}{{"Swali:
"+question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
Jibu ni (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Swali:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_sw
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[19:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nదశలవారీగా
సమాధానం:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nదశలవారీగా
సమాధానం:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
సమాధానం (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
ప్రశ్న:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_te
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
th
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nคำตอบทีละขั้นตอน:"}}{%
else
%}{{"โจทย์:
"+question+"\nคำตอบทีละขั้นตอน:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
คำตอบคือ (\-?[0-9\.\,]+)
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
โจทย์:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_th
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
0 → 100644
View file @
90ad5db7
# Generated by utils.py
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\n逐步解答:"}}{%
else
%}{{"问题:
"+question+"\n逐步解答:"}}{%
endif
%}'
filter_list
:
-
filter
:
-
function
:
regex
regex_pattern
:
答案是 (\-?[0-9\.\,]+)。
-
function
:
take_first
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
问题:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
task
:
mgsm_native_cot_zh
lm_eval/tasks/mgsm/utils.py
View file @
90ad5db7
import
yaml
import
argparse
import
argparse
import
yaml
LANGUAGES
=
{
LANGUAGES
=
{
"bn"
:
{
# Bengali
"bn"
:
{
# Bengali
...
@@ -99,11 +100,24 @@ def add_regex_pattern(regex_pattern):
...
@@ -99,11 +100,24 @@ def add_regex_pattern(regex_pattern):
return
{
return
{
"filter_list"
:
[
"filter_list"
:
[
{
{
"name"
:
"get-answer"
,
"name"
:
"strict-match"
,
"filter"
:
[
{
"function"
:
"regex"
,
"regex_pattern"
:
f
"""
{
regex_pattern
}
"""
,
},
{
"function"
:
"take_first"
,
},
],
},
{
"name"
:
"flexible-extract"
,
"filter"
:
[
"filter"
:
[
{
{
"function"
:
"regex"
,
"function"
:
"regex"
,
"regex_pattern"
:
regex_pattern
,
"regex_pattern"
:
"""(-?[$0-9.,]{2,})|(-?[0-9]+)"""
,
"group_select"
:
-
1
,
},
},
{
{
"function"
:
"take_first"
,
"function"
:
"take_first"
,
...
@@ -128,23 +142,25 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -128,23 +142,25 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
yaml_template
=
"cot_yaml"
yaml_template
=
"cot_yaml"
filter_list
=
{}
filter_list
=
{}
DELIMITER
=
None
if
mode
==
"direct"
:
if
mode
==
"direct"
:
ANSWER
=
LANGUAGES
[
lang
][
"DIRECT"
]
ANSWER
=
LANGUAGES
[
lang
][
"DIRECT"
]
REGEX
=
None
REGEX
=
None
task_name
=
f
"mgsm_
{
lang
}
_direct
"
task_name
=
f
"mgsm_
direct_
{
lang
}
"
yaml_template
=
"direct_yaml"
yaml_template
=
"direct_yaml"
elif
mode
==
"native-cot"
:
elif
mode
==
"native-cot"
:
ANSWER
=
LANGUAGES
[
lang
][
"ANSWER"
]
ANSWER
=
LANGUAGES
[
lang
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
lang
][
"REGEX"
]
REGEX
=
LANGUAGES
[
lang
][
"REGEX"
]
task_name
=
f
"mgsm_
{
lang
}
_
native
-
cot"
task_name
=
f
"mgsm_native
_
cot
_
{
lang
}
"
filter_list
=
add_regex_pattern
(
REGEX
)
filter_list
=
add_regex_pattern
(
REGEX
)
DELIMITER
=
""
if
lang
in
[
"zh"
,
"ja"
]
else
None
elif
mode
==
"en-cot"
:
elif
mode
==
"en-cot"
:
ANSWER
=
LANGUAGES
[
"en"
][
"ANSWER"
]
ANSWER
=
LANGUAGES
[
"en"
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
"en"
][
"REGEX"
]
REGEX
=
LANGUAGES
[
"en"
][
"REGEX"
]
task_name
=
f
"mgsm_
{
lang
}
_en-cot
"
task_name
=
f
"mgsm_
en_cot_
{
lang
}
"
file_name
=
f
"
{
task_name
}
.yaml"
file_name
=
f
"
{
task_name
}
.yaml"
ANSWER_TO_SKIP
=
len
(
LANGUAGES
[
lang
][
"ANSWER"
])
+
1
with
open
(
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
)
as
f
:
)
as
f
:
...
@@ -153,18 +169,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -153,18 +169,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
{
{
"include"
:
yaml_template
,
"include"
:
yaml_template
,
"dataset_name"
:
lang
,
"dataset_name"
:
lang
,
"task"
:
f
"
mgsm_
{
lang
}
_direct
"
,
"task"
:
f
"
{
task_name
}
"
,
"doc_to_text"
:
f
"""{{% if answer is not none %}}"""
"doc_to_text"
:
f
"""{{% if answer is not none %}}"""
f
"""{{{{question+"
\\
n
{
ANSWER
}
"}}}}"""
f
"""{{{{question+"
\\
n
{
ANSWER
}
"}}}}"""
f
"""{{% else %}}"""
f
"""{{% else %}}"""
f
"""{{{{"
{
QUESTION
}
"+question+"
\\
n
{
ANSWER
}
"}}}}"""
f
"""{{{{"
{
QUESTION
}
"+question+"
\\
n
{
ANSWER
}
"}}}}"""
f
"""{{% endif %}}"""
,
f
"""{{% endif %}}"""
,
"doc_to_target"
:
f
"""{{% if answer is not none %}}"""
"doc_to_target"
:
f
"""{{% if answer is not none %}}"""
f
"""{{{{answer[
{
len
(
ANSWER
)
}
+1
]}}}}"""
f
"""{{{{answer[
{
ANSWER
_TO_SKIP
}
:
]}}}}"""
f
"""{{% else %}}"""
f
"""{{% else %}}"""
f
"""{{{{answer_number|string}}}}"""
f
"""{{{{answer_number|string}}}}"""
f
"""{{% endif %}}"""
,
f
"""{{% endif %}}"""
,
**
filter_list
,
**
filter_list
,
"generation_kwargs"
:
{
"until"
:
[
QUESTION
,
"</s>"
,
"<|im_end|>"
],
"do_sample"
:
False
,
},
**
({
"target_delimiter"
:
DELIMITER
}
if
DELIMITER
else
{}),
},
},
f
,
f
,
allow_unicode
=
True
,
allow_unicode
=
True
,
...
...
lm_eval/tasks/minerva_math/utils.py
View file @
90ad5db7
import
datasets
import
re
import
re
import
signal
import
signal
from
typing
import
Dict
,
List
,
Optional
import
datasets
from
lm_eval.utils
import
eval_logger
from
lm_eval.utils
import
eval_logger
from
typing
import
Optional
,
List
,
Dict
try
:
try
:
import
sympy
import
sympy
from
sympy.parsing.latex
import
parse_latex
from
sympy.parsing.latex
import
parse_latex
except
ModuleNotFoundError
:
except
ModuleNotFoundError
:
raise
Exception
(
raise
ModuleNotFoundError
(
"`sympy` is required for generating translation task prompt templates.
\
"`sympy` is required for generating translation task prompt templates.
\
please install sympy via pip install lm-eval[math] or pip install -e .[math]"
,
please install sympy via pip install lm-eval[math] or pip install -e .[math]"
,
)
)
...
...
lm_eval/tasks/mmlu/_generate_configs.py
View file @
90ad5db7
"""
"""
Take in a YAML, and output all "other" splits with this YAML
Take in a YAML, and output all "other" splits with this YAML
"""
"""
import
os
import
yaml
import
argparse
import
argparse
import
os
import
yaml
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
lm_eval.logger
import
eval_logger
from
lm_eval.logger
import
eval_logger
SUBJECTS
=
{
SUBJECTS
=
{
"abstract_algebra"
:
"stem"
,
"abstract_algebra"
:
"stem"
,
"anatomy"
:
"stem"
,
"anatomy"
:
"stem"
,
...
@@ -124,7 +125,6 @@ if __name__ == "__main__":
...
@@ -124,7 +125,6 @@ if __name__ == "__main__":
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
yaml_file
,
yaml_file
,
# width=float("inf"),
allow_unicode
=
True
,
allow_unicode
=
True
,
default_style
=
'"'
,
default_style
=
'"'
,
)
)
...
...
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
View file @
90ad5db7
import
re
import
re
import
sys
import
sys
import
unicodedata
import
unicodedata
from
lm_eval.filters.extraction
import
RegexFilter
from
lm_eval.filters.extraction
import
RegexFilter
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
""" """
""" """
def
__init__
(
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
self
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
)
->
None
:
"""
"""
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
match
=
convert_dict
[
match
]
match
=
convert_dict
[
match
]
return
match
return
match
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
punct_tbl
=
dict
.
fromkeys
(
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
'P'
))
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
"P"
)
)
def
filter_ignores
(
st
):
def
filter_ignores
(
st
):
if
self
.
regexes_to_ignore
is
not
None
:
if
self
.
regexes_to_ignore
is
not
None
:
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
for
r
,
doc
in
zip
(
resps
,
docs
):
for
r
,
doc
in
zip
(
resps
,
docs
):
fallback_regexes
=
[]
fallback_regexes
=
[]
choice_to_alpha
=
{}
choice_to_alpha
=
{}
next_alpha
=
'A'
next_alpha
=
"A"
without_paren_fallback_regexes
=
[]
without_paren_fallback_regexes
=
[]
without_paren_to_target
=
{}
without_paren_to_target
=
{}
choices
=
doc
[
'
choices
'
]
choices
=
doc
[
"
choices
"
]
for
c
in
choices
:
for
c
in
choices
:
m
=
filter_ignores
(
c
.
strip
())
m
=
filter_ignores
(
c
.
strip
())
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
fallback_regex
=
re
.
compile
(
'|'
.
join
(
fallback_regexes
))
fallback_regex
=
re
.
compile
(
"|"
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
'|'
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
"|"
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
filtered
=
[]
filtered
=
[]
for
resp
in
r
:
for
resp
in
r
:
match
=
find_match
(
self
.
regex
,
resp
)
match
=
find_match
(
self
.
regex
,
resp
)
if
not
match
:
if
not
match
:
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
if
not
match
:
if
not
match
:
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
if
not
match
:
if
not
match
:
match
=
self
.
fallback
match
=
self
.
fallback
filtered
.
append
(
match
)
filtered
.
append
(
match
)
...
...
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
View file @
90ad5db7
import
re
import
re
import
sys
import
sys
import
unicodedata
import
unicodedata
from
lm_eval.filters.extraction
import
RegexFilter
from
lm_eval.filters.extraction
import
RegexFilter
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
""" """
""" """
def
__init__
(
def
__init__
(
self
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
self
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
regex_pattern
:
str
=
r
"#### (\-?[0-9\.\,]+)"
,
group_select
=
0
,
fallback
:
str
=
"[invalid]"
,
ignore_case
=
False
,
ignore_punctuation
=
False
,
regexes_to_ignore
=
None
,
)
->
None
:
)
->
None
:
"""
"""
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
match
=
convert_dict
[
match
]
match
=
convert_dict
[
match
]
return
match
return
match
punct_tbl
=
dict
.
fromkeys
(
i
for
i
in
range
(
sys
.
maxunicode
)
punct_tbl
=
dict
.
fromkeys
(
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
'P'
))
i
for
i
in
range
(
sys
.
maxunicode
)
if
unicodedata
.
category
(
chr
(
i
)).
startswith
(
"P"
)
)
def
filter_ignores
(
st
):
def
filter_ignores
(
st
):
if
self
.
regexes_to_ignore
is
not
None
:
if
self
.
regexes_to_ignore
is
not
None
:
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
for
r
,
doc
in
zip
(
resps
,
docs
):
for
r
,
doc
in
zip
(
resps
,
docs
):
fallback_regexes
=
[]
fallback_regexes
=
[]
choice_to_alpha
=
{}
choice_to_alpha
=
{}
next_alpha
=
'A'
next_alpha
=
"A"
without_paren_fallback_regexes
=
[]
without_paren_fallback_regexes
=
[]
without_paren_to_target
=
{}
without_paren_to_target
=
{}
choices
=
doc
[
'
choices
'
]
choices
=
doc
[
"
choices
"
]
for
c
in
choices
:
for
c
in
choices
:
m
=
filter_ignores
(
c
.
strip
())
m
=
filter_ignores
(
c
.
strip
())
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
fallback_regexes
.
append
(
f
"
{
re
.
escape
(
m
)
}
"
)
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
...
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
without_paren_to_target
[
next_alpha
]
=
f
"(
{
next_alpha
}
)"
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
next_alpha
=
chr
(
ord
(
next_alpha
)
+
1
)
fallback_regex
=
re
.
compile
(
'|'
.
join
(
fallback_regexes
))
fallback_regex
=
re
.
compile
(
"|"
.
join
(
fallback_regexes
))
without_paren_fallback_regex
=
'|'
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
"|"
.
join
(
without_paren_fallback_regexes
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
without_paren_fallback_regex
=
re
.
compile
(
f
":[\s]*(
{
without_paren_fallback_regex
}
)"
)
filtered
=
[]
filtered
=
[]
for
resp
in
r
:
for
resp
in
r
:
match
=
find_match
(
self
.
regex
,
resp
)
match
=
find_match
(
self
.
regex
,
resp
)
if
not
match
:
if
not
match
:
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
match
=
find_match
(
fallback_regex
,
filter_ignores
(
resp
),
choice_to_alpha
)
if
not
match
:
if
not
match
:
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
match
=
find_match
(
without_paren_fallback_regex
,
resp
,
without_paren_to_target
)
if
not
match
:
if
not
match
:
match
=
self
.
fallback
match
=
self
.
fallback
filtered
.
append
(
match
)
filtered
.
append
(
match
)
...
...
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
View file @
90ad5db7
import
yaml
import
datasets
import
datasets
import
yaml
from
tqdm
import
tqdm
from
tqdm
import
tqdm
...
...
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
View file @
90ad5db7
import
yaml
import
datasets
import
datasets
import
yaml
from
tqdm
import
tqdm
from
tqdm
import
tqdm
...
...
lm_eval/tasks/okapi/arc_multilingual/utils.py
View file @
90ad5db7
import
datasets
import
re
import
re
import
datasets
def
preprocess
(
text
):
def
preprocess
(
text
):
if
text
is
None
:
if
text
is
None
:
...
@@ -18,7 +19,13 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
...
@@ -18,7 +19,13 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
out_doc
=
{
out_doc
=
{
"id"
:
doc
[
"id"
],
"id"
:
doc
[
"id"
],
"query"
:
"Question: "
+
preprocess
(
doc
[
"instruction"
])
+
"
\n
Answer:"
,
"query"
:
"Question: "
+
preprocess
(
doc
[
"instruction"
])
+
"
\n
Answer:"
,
"choices"
:
[
preprocess
(
doc
[
'option_a'
]),
preprocess
(
doc
[
'option_b'
]),
preprocess
(
doc
[
'option_c'
]),
preprocess
(
doc
[
'option_d'
]),
preprocess
(
doc
[
'option_e'
])],
"choices"
:
[
preprocess
(
doc
[
"option_a"
]),
preprocess
(
doc
[
"option_b"
]),
preprocess
(
doc
[
"option_c"
]),
preprocess
(
doc
[
"option_d"
]),
preprocess
(
doc
[
"option_e"
]),
],
"gold"
:
[
"A"
,
"B"
,
"C"
,
"D"
,
"E"
].
index
(
doc
[
"answer"
]),
"gold"
:
[
"A"
,
"B"
,
"C"
,
"D"
,
"E"
].
index
(
doc
[
"answer"
]),
}
}
return
out_doc
return
out_doc
...
...
Prev
1
…
16
17
18
19
20
21
22
23
24
25
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment