Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
8371662c
"app/vscode:/vscode.git/clone" did not exist on "ed6abba75a5de5e9934187cca1fe115e40584e76"
Unverified
Commit
8371662c
authored
Feb 24, 2024
by
thnkinbtfly
Committed by
GitHub
Feb 23, 2024
Browse files
update parsing logic of mgsm following gsm8k (#1462)
parent
75ac1f47
Changes
37
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
188 additions
and
18 deletions
+188
-18
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_sw.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_sw.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_te.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_te.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_th.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_th.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml
+6
-0
lm_eval/tasks/mgsm/native_cot/cot_yaml
lm_eval/tasks/mgsm/native_cot/cot_yaml
+1
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
+13
-2
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
+13
-1
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
+13
-2
lm_eval/tasks/mgsm/utils.py
lm_eval/tasks/mgsm/utils.py
+20
-4
No files found.
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_sw.yaml
View file @
8371662c
...
@@ -2,5 +2,11 @@
...
@@ -2,5 +2,11 @@
dataset_name
:
sw
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[25:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[25:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Swali:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Swali:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
Swali:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_en_cot_sw
task
:
mgsm_en_cot_sw
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_te.yaml
View file @
8371662c
...
@@ -2,5 +2,11 @@
...
@@ -2,5 +2,11 @@
dataset_name
:
te
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[19:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[19:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
ప్రశ్న:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_en_cot_te
task
:
mgsm_en_cot_te
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_th.yaml
View file @
8371662c
...
@@ -2,5 +2,11 @@
...
@@ -2,5 +2,11 @@
dataset_name
:
th
dataset_name
:
th
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"โจทย์:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"โจทย์:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
โจทย์:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_en_cot_th
task
:
mgsm_en_cot_th
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml
View file @
8371662c
...
@@ -2,5 +2,11 @@
...
@@ -2,5 +2,11 @@
dataset_name
:
zh
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"问题:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"问题:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
问题:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_en_cot_zh
task
:
mgsm_en_cot_zh
lm_eval/tasks/mgsm/native_cot/cot_yaml
View file @
8371662c
...
@@ -28,4 +28,4 @@ filter_list:
...
@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
- function: "take_first"
metadata:
metadata:
version:
2
.0
version:
3
.0
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
প্রশ্ন:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_bn
task
:
mgsm_native_cot_bn
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
Die Antwort lautet (\-?[0-9\.\,]+)
regex_pattern
:
Die Antwort lautet (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Frage:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_de
task
:
mgsm_native_cot_de
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Question:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_en
task
:
mgsm_native_cot_en
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
La respuesta es (\-?[0-9\.\,]+)
regex_pattern
:
La respuesta es (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Pregunta:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_es
task
:
mgsm_native_cot_es
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
La réponse est (\-?[0-9\.\,]+)
regex_pattern
:
La réponse est (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Question
:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_fr
task
:
mgsm_native_cot_fr
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
View file @
8371662c
...
@@ -7,7 +7,18 @@ filter_list:
...
@@ -7,7 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
答えは(\-?[0-9\.\,]+)です。
regex_pattern
:
答えは(\-?[0-9\.\,]+)です。
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
問題:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
target_delimiter
:
"
"
task
:
mgsm_native_cot_ja
task
:
mgsm_native_cot_ja
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
Ответ — (\-?[0-9\.\,]+)
regex_pattern
:
Ответ — (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Задача:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_ru
task
:
mgsm_native_cot_ru
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
Jibu ni (\-?[0-9\.\,]+)
regex_pattern
:
Jibu ni (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
Swali:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_sw
task
:
mgsm_native_cot_sw
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
సమాధానం (\-?[0-9\.\,]+)
regex_pattern
:
సమాధానం (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
ప్రశ్న:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_te
task
:
mgsm_native_cot_te
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
View file @
8371662c
...
@@ -7,6 +7,18 @@ filter_list:
...
@@ -7,6 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
คำตอบคือ (\-?[0-9\.\,]+)
regex_pattern
:
คำตอบคือ (\-?[0-9\.\,]+)
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
โจทย์:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_native_cot_th
task
:
mgsm_native_cot_th
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
View file @
8371662c
...
@@ -7,7 +7,18 @@ filter_list:
...
@@ -7,7 +7,18 @@ filter_list:
-
function
:
regex
-
function
:
regex
regex_pattern
:
答案是 (\-?[0-9\.\,]+)。
regex_pattern
:
答案是 (\-?[0-9\.\,]+)。
-
function
:
take_first
-
function
:
take_first
name
:
get-answer
name
:
strict-match
-
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
(-?[$0-9.,]{2,})|(-?[0-9]+)
-
function
:
take_first
name
:
flexible-extract
generation_kwargs
:
do_sample
:
false
until
:
-
'
问题:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
target_delimiter
:
"
"
task
:
mgsm_native_cot_zh
task
:
mgsm_native_cot_zh
lm_eval/tasks/mgsm/utils.py
View file @
8371662c
...
@@ -99,11 +99,24 @@ def add_regex_pattern(regex_pattern):
...
@@ -99,11 +99,24 @@ def add_regex_pattern(regex_pattern):
return
{
return
{
"filter_list"
:
[
"filter_list"
:
[
{
{
"name"
:
"
get-answer
"
,
"name"
:
"
strict-match
"
,
"filter"
:
[
"filter"
:
[
{
{
"function"
:
"regex"
,
"function"
:
"regex"
,
"regex_pattern"
:
regex_pattern
,
"regex_pattern"
:
f
"""
{
regex_pattern
}
"""
,
},
{
"function"
:
"take_first"
,
},
],
},
{
"name"
:
"flexible-extract"
,
"filter"
:
[
{
"function"
:
"regex"
,
"regex_pattern"
:
"""(-?[$0-9.,]{2,})|(-?[0-9]+)"""
,
"group_select"
:
-
1
,
},
},
{
{
"function"
:
"take_first"
,
"function"
:
"take_first"
,
...
@@ -113,7 +126,6 @@ def add_regex_pattern(regex_pattern):
...
@@ -113,7 +126,6 @@ def add_regex_pattern(regex_pattern):
],
],
}
}
def
gen_lang_yamls
(
output_dir
:
str
,
overwrite
:
bool
,
mode
:
str
)
->
None
:
def
gen_lang_yamls
(
output_dir
:
str
,
overwrite
:
bool
,
mode
:
str
)
->
None
:
"""
"""
Generate a yaml file for each language.
Generate a yaml file for each language.
...
@@ -139,7 +151,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -139,7 +151,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
REGEX
=
LANGUAGES
[
lang
][
"REGEX"
]
REGEX
=
LANGUAGES
[
lang
][
"REGEX"
]
task_name
=
f
"mgsm_native_cot_
{
lang
}
"
task_name
=
f
"mgsm_native_cot_
{
lang
}
"
filter_list
=
add_regex_pattern
(
REGEX
)
filter_list
=
add_regex_pattern
(
REGEX
)
DELIMITER
=
""
if
lang
in
[
"zh"
,
"ja"
]
DELIMITER
=
""
if
lang
in
[
"zh"
,
"ja"
]
else
None
elif
mode
==
"en-cot"
:
elif
mode
==
"en-cot"
:
ANSWER
=
LANGUAGES
[
"en"
][
"ANSWER"
]
ANSWER
=
LANGUAGES
[
"en"
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
"en"
][
"REGEX"
]
REGEX
=
LANGUAGES
[
"en"
][
"REGEX"
]
...
@@ -167,6 +179,10 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -167,6 +179,10 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
f
"""{{{{answer_number|string}}}}"""
f
"""{{{{answer_number|string}}}}"""
f
"""{{% endif %}}"""
,
f
"""{{% endif %}}"""
,
**
filter_list
,
**
filter_list
,
"generation_kwargs"
:
{
"until"
:
[
QUESTION
,
"</s>"
,
"<|im_end|>"
],
"do_sample"
:
False
},
**
({
"target_delimiter"
:
DELIMITER
}
if
DELIMITER
else
{}),
**
({
"target_delimiter"
:
DELIMITER
}
if
DELIMITER
else
{}),
},
},
f
,
f
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment