Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b8d1cef9
Unverified
Commit
b8d1cef9
authored
Sep 12, 2023
by
Lintang Sutawika
Committed by
GitHub
Sep 12, 2023
Browse files
Merge pull request #845 from EleutherAI/fix-mgsm
[Refactor] Fix MGSM
parents
f052d059
5ebe28eb
Changes
23
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
49 deletions
+44
-49
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_th.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_th.yaml
+0
-14
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
+0
-14
lm_eval/tasks/mgsm/utils.py
lm_eval/tasks/mgsm/utils.py
+44
-21
No files found.
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_th.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
th
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[17+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nคำตอบทีละขั้นตอน:"}}{%
else
%}{{"โจทย์:
"+question+"\nคำตอบทีละขั้นตอน:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_th_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[5+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\n逐步解答:"}}{%
else
%}{{"问题:
"+question+"\n逐步解答:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_zh_direct
lm_eval/tasks/mgsm/utils.py
View file @
b8d1cef9
...
@@ -4,16 +4,19 @@ import argparse
...
@@ -4,16 +4,19 @@ import argparse
LANGUAGES
=
{
LANGUAGES
=
{
"bn"
:
{
# Bengali
"bn"
:
{
# Bengali
# "QUESTION": "প্রশ্ন:",
"QUESTION"
:
"
\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8
:"
,
"QUESTION"
:
"
\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8
:"
,
# "ANSWER": "ধাপে ধাপে উত্তর:",
"ANSWER"
:
"
\u09a7\u09be\u09aa\u09c7
\u09a7\u09be\u09aa\u09c7
\u0989\u09a4\u09cd\u09a4\u09b0
:"
,
"ANSWER"
:
"
\u09a7\u09be\u09aa\u09c7
\u09a7\u09be\u09aa\u09c7
\u0989\u09a4\u09cd\u09a4\u09b0
:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
},
},
"de"
:
{
# German
"de"
:
{
# German
"QUESTION"
:
"Frage:"
,
"QUESTION"
:
"Frage:"
,
# "ANSWER": "Schritt-für-Schritt-Antwort:",
"ANSWER"
:
"Schritt-f
\u00fc
r-Schritt-Antwort:"
,
"ANSWER"
:
"Schritt-f
\u00fc
r-Schritt-Antwort:"
,
"DIRECT"
:
"Antwort:"
,
"DIRECT"
:
"Antwort:"
,
"REGEX"
:
"
The answer is
(
\\
-?[0-9
\\
.
\\
,]+)"
,
"REGEX"
:
"
Die Antwort lautet
(
\\
-?[0-9
\\
.
\\
,]+)"
,
},
},
"en"
:
{
# English
"en"
:
{
# English
"QUESTION"
:
"Question:"
,
"QUESTION"
:
"Question:"
,
...
@@ -24,50 +27,68 @@ LANGUAGES = {
...
@@ -24,50 +27,68 @@ LANGUAGES = {
"es"
:
{
# Spanish
"es"
:
{
# Spanish
"QUESTION"
:
"Pregunta:"
,
"QUESTION"
:
"Pregunta:"
,
"ANSWER"
:
"Respuesta paso a paso:"
,
"ANSWER"
:
"Respuesta paso a paso:"
,
"DIRECT"
:
"
Answer
:"
,
"DIRECT"
:
"
Respuesta
:"
,
"REGEX"
:
"
The answer i
s (
\\
-?[0-9
\\
.
\\
,]+)"
,
"REGEX"
:
"
La respuesta e
s (
\\
-?[0-9
\\
.
\\
,]+)"
,
},
},
"fr"
:
{
# French
"fr"
:
{
# French
"QUESTION"
:
"Question :"
,
"QUESTION"
:
"Question :"
,
# "ANSWER": "Réponse étape par étape :"
"ANSWER"
:
"R
\u00e9
ponse
\u00e9
tape par
\u00e9
tape :"
,
"ANSWER"
:
"R
\u00e9
ponse
\u00e9
tape par
\u00e9
tape :"
,
"DIRECT"
:
"Answer:"
,
# "DIRECT": "Réponse :",
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
"DIRECT"
:
"R
\u00e9
ponse :"
,
# "REGEX": "La réponse est (\\-?[0-9\\.\\,]+)",
"REGEX"
:
"La r
\u00e9
ponse est (
\\
-?[0-9
\\
.
\\
,]+)"
,
},
},
"ru"
:
{
# Russian
"ru"
:
{
# Russian
# "QUESTION": "Задача:",
"QUESTION"
:
"
\u0417\u0430\u0434\u0430\u0447\u0430
:"
,
"QUESTION"
:
"
\u0417\u0430\u0434\u0430\u0447\u0430
:"
,
# "ANSWER": "Пошаговоерешение:",
"ANSWER"
:
"
\u041f\u043e\u0448\u0430\u0433\u043e\u0432\u043e\u0435\u0440\u0435\u0448\u0435\u043d\u0438\u0435
:"
,
"ANSWER"
:
"
\u041f\u043e\u0448\u0430\u0433\u043e\u0432\u043e\u0435\u0440\u0435\u0448\u0435\u043d\u0438\u0435
:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
# "REGEX": "Ответ — (\\-?[0-9\\.\\,]+)",
"REGEX"
:
"
\u041e\u0442\u0432\u0435\u0442
\u2014
(
\\
-?[0-9
\\
.
\\
,]+)"
,
},
},
"sw"
:
{
# Swahili
"sw"
:
{
# Swahili
"QUESTION"
:
"Swali:"
,
"QUESTION"
:
"Swali:"
,
"ANSWER"
:
"Jibu la Hatua kwa Hatua:"
,
"ANSWER"
:
"Jibu la Hatua kwa Hatua:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
"
The answer is
(
\\
-?[0-9
\\
.
\\
,]+)"
,
"REGEX"
:
"
Jibu ni
(
\\
-?[0-9
\\
.
\\
,]+)"
,
},
},
"te"
:
{
# Telugu
"te"
:
{
# Telugu
# "QUESTION": "ప్రశ్న:",
"QUESTION"
:
"
\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28
:"
,
"QUESTION"
:
"
\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28
:"
,
# "ANSWER": "దశలవారీగా సమాధానం:",
"ANSWER"
:
"
\u0c26\u0c36\u0c32\u0c35\u0c3e\u0c30\u0c40\u0c17\u0c3e
\u0c38\u0c2e\u0c3e\u0c27\u0c3e\u0c28\u0c02
:"
,
"ANSWER"
:
"
\u0c26\u0c36\u0c32\u0c35\u0c3e\u0c30\u0c40\u0c17\u0c3e
\u0c38\u0c2e\u0c3e\u0c27\u0c3e\u0c28\u0c02
:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
# "REGEX": "సమాధానం (\\-?[0-9\\.\\,]+)",
"REGEX"
:
"
\u0c38\u0c2e\u0c3e\u0c27\u0c3e\u0c28\u0c02
(
\\
-?[0-9
\\
.
\\
,]+)"
,
},
},
"th"
:
{
# Thai
"th"
:
{
# Thai
# "QUESTION": "โจทย์:",
"QUESTION"
:
"
\u0e42\u0e08\u0e17\u0e22\u0e4c
:"
,
"QUESTION"
:
"
\u0e42\u0e08\u0e17\u0e22\u0e4c
:"
,
# "ANSWER": "คำตอบทีละขั้นตอน:",
"ANSWER"
:
"
\u0e04\u0e33\u0e15\u0e2d\u0e1a\u0e17\u0e35\u0e25\u0e30\u0e02\u0e31\u0e49\u0e19\u0e15\u0e2d\u0e19
:"
,
"ANSWER"
:
"
\u0e04\u0e33\u0e15\u0e2d\u0e1a\u0e17\u0e35\u0e25\u0e30\u0e02\u0e31\u0e49\u0e19\u0e15\u0e2d\u0e19
:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
# "REGEX": "คำตอบคือ (\\-?[0-9\\.\\,]+)",
"REGEX"
:
"
\u0e04\u0e33\u0e15\u0e2d\u0e1a\u0e04\u0e37\u0e2d
(
\\
-?[0-9
\\
.
\\
,]+)"
,
},
},
"ja"
:
{
# Japanese
"ja"
:
{
# Japanese
# "QUESTION": "問題:",
"QUESTION"
:
"
\u554f\u984c
:"
,
"QUESTION"
:
"
\u554f\u984c
:"
,
# "ANSWER": "ステップごとの答え:",
"ANSWER"
:
"
\u30b9\u30c6\u30c3\u30d7\u3054\u3068\u306e\u7b54\u3048
:"
,
"ANSWER"
:
"
\u30b9\u30c6\u30c3\u30d7\u3054\u3068\u306e\u7b54\u3048
:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
# "REGEX": "答えは(\\-?[0-9\\.\\,]+)です。",
"REGEX"
:
"
\u7b54\u3048\u306f
(
\\
-?[0-9
\\
.
\\
,]+)
\u3067\u3059\u3002
"
,
},
},
"zh"
:
{
# Chinese
"zh"
:
{
# Chinese
# "QUESTION": "问题:",
"QUESTION"
:
"
\u95ee\u9898
:"
,
"QUESTION"
:
"
\u95ee\u9898
:"
,
# "ANSWER": "逐步解答:",
"ANSWER"
:
"
\u9010\u6b65\u89e3\u7b54
:"
,
"ANSWER"
:
"
\u9010\u6b65\u89e3\u7b54
:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
# "REGEX": "答案是 (\\-?[0-9\\.\\,]+)。",
"REGEX"
:
"
\u7b54\u6848\u662f
(
\\
-?[0-9
\\
.
\\
,]+)
\u3002
"
,
},
},
}
}
...
@@ -80,8 +101,6 @@ def add_regex_pattern(regex_pattern):
...
@@ -80,8 +101,6 @@ def add_regex_pattern(regex_pattern):
"filter_list"
:
[
"filter_list"
:
[
{
{
"name"
:
"get-answer"
,
"name"
:
"get-answer"
,
},
],
"filter"
:
[
"filter"
:
[
{
{
"function"
:
"regex"
,
"function"
:
"regex"
,
...
@@ -91,6 +110,8 @@ def add_regex_pattern(regex_pattern):
...
@@ -91,6 +110,8 @@ def add_regex_pattern(regex_pattern):
"function"
:
"take_first"
,
"function"
:
"take_first"
,
},
},
],
],
},
],
}
}
...
@@ -107,6 +128,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -107,6 +128,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
QUESTION
=
LANGUAGES
[
lang
][
"QUESTION"
]
QUESTION
=
LANGUAGES
[
lang
][
"QUESTION"
]
yaml_template
=
"cot_yaml"
yaml_template
=
"cot_yaml"
filter_list
=
{}
if
mode
==
"direct"
:
if
mode
==
"direct"
:
ANSWER
=
LANGUAGES
[
lang
][
"DIRECT"
]
ANSWER
=
LANGUAGES
[
lang
][
"DIRECT"
]
REGEX
=
None
REGEX
=
None
...
@@ -116,13 +138,13 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -116,13 +138,13 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
ANSWER
=
LANGUAGES
[
lang
][
"ANSWER"
]
ANSWER
=
LANGUAGES
[
lang
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
lang
][
"REGEX"
]
REGEX
=
LANGUAGES
[
lang
][
"REGEX"
]
task_name
=
f
"mgsm_
{
lang
}
_native-cot"
task_name
=
f
"mgsm_
{
lang
}
_native-cot"
filter_list
=
add_regex_pattern
(
REGEX
)
elif
mode
==
"en-cot"
:
elif
mode
==
"en-cot"
:
ANSWER
=
LANGUAGES
[
"en"
][
"ANSWER"
]
ANSWER
=
LANGUAGES
[
"en"
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
"en"
][
"REGEX"
]
REGEX
=
LANGUAGES
[
"en"
][
"REGEX"
]
task_name
=
f
"mgsm_
{
lang
}
_en-cot"
task_name
=
f
"mgsm_
{
lang
}
_en-cot"
file_name
=
f
"
{
task_name
}
.yaml"
file_name
=
f
"
{
task_name
}
.yaml"
filter_list
=
add_regex_pattern
(
REGEX
)
with
open
(
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
...
@@ -147,6 +169,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -147,6 +169,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
},
},
f
,
f
,
allow_unicode
=
True
,
allow_unicode
=
True
,
width
=
float
(
"inf"
),
)
)
except
FileExistsError
:
except
FileExistsError
:
err
.
append
(
file_name
)
err
.
append
(
file_name
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment