Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b8d1cef9
Unverified
Commit
b8d1cef9
authored
Sep 12, 2023
by
Lintang Sutawika
Committed by
GitHub
Sep 12, 2023
Browse files
Merge pull request #845 from EleutherAI/fix-mgsm
[Refactor] Fix MGSM
parents
f052d059
5ebe28eb
Changes
23
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
72 additions
and
112 deletions
+72
-112
lm_eval/tasks/mgsm/en_cot/cot_yaml
lm_eval/tasks/mgsm/en_cot/cot_yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_bn_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_bn_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_de_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_de_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_es_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_es_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_fr_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_fr_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_ja_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_ja_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_ru_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_ru_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_sw_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_sw_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_te_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_te_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_th_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_th_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/en_cot/mgsm_zh_en-cot.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_zh_en-cot.yaml
+6
-0
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_bn.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_bn.yaml
+0
-14
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_de.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_de.yaml
+0
-14
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_es.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_es.yaml
+0
-14
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_fr.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_fr.yaml
+0
-14
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ja.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ja.yaml
+0
-14
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ru.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ru.yaml
+0
-14
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_sw.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_sw.yaml
+0
-14
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_te.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_te.yaml
+0
-14
No files found.
lm_eval/tasks/mgsm/
nativ
e_cot/cot_yaml
→
lm_eval/tasks/mgsm/e
n
_cot/cot_yaml
View file @
b8d1cef9
...
@@ -21,3 +21,9 @@ metric_list:
...
@@ -21,3 +21,9 @@ metric_list:
higher_is_better: true
higher_is_better: true
ignore_case: true
ignore_case: true
ignore_punctuation: true
ignore_punctuation: true
filter_list:
- name: "get-answer"
filter:
- function: "regex"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
lm_eval/tasks/mgsm/en_cot/mgsm_bn_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
bn
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_bn_direct
lm_eval/tasks/mgsm/en_cot/mgsm_de_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
de
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Frage:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_de_direct
lm_eval/tasks/mgsm/
nativ
e_cot/mgsm_
cot_native_en
.yaml
→
lm_eval/tasks/mgsm/e
n
_cot/mgsm_
en_en-cot
.yaml
View file @
b8d1cef9
# Generated by utils.py
# Generated by utils.py
dataset_name
:
en
dataset_name
:
en
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_en_direct
task
:
mgsm_en_direct
lm_eval/tasks/mgsm/en_cot/mgsm_es_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
es
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Pregunta:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_es_direct
lm_eval/tasks/mgsm/en_cot/mgsm_fr_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
fr
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question
:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_fr_direct
lm_eval/tasks/mgsm/en_cot/mgsm_ja_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
ja
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"問題:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_ja_direct
lm_eval/tasks/mgsm/en_cot/mgsm_ru_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
ru
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Задача:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_ru_direct
lm_eval/tasks/mgsm/en_cot/mgsm_sw_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Swali:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_sw_direct
lm_eval/tasks/mgsm/en_cot/mgsm_te_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_te_direct
lm_eval/tasks/mgsm/en_cot/mgsm_th_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
th
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"โจทย์:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_th_direct
lm_eval/tasks/mgsm/en_cot/mgsm_zh_en-cot.yaml
0 → 100644
View file @
b8d1cef9
# Generated by utils.py
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"问题:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_zh_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_bn.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
bn
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[16+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nধাপে
ধাপে
উত্তর:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nধাপে
ধাপে
উত্তর:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_bn_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_de.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
de
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[28+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nSchritt-für-Schritt-Antwort:"}}{%
else
%}{{"Frage:
"+question+"\nSchritt-für-Schritt-Antwort:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_de_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_es.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
es
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[22+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRespuesta
paso
a
paso:"}}{%
else
%}{{"Pregunta:
"+question+"\nRespuesta
paso
a
paso:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_es_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_fr.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
fr
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[25+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRéponse
étape
par
étape
:"}}{%
else
%}{{"Question
:
"+question+"\nRéponse
étape
par
étape
:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_fr_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ja.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
ja
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[10+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nステップごとの答え:"}}{%
else
%}{{"問題:
"+question+"\nステップごとの答え:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_ja_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ru.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
ru
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[17+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nПошаговоерешение:"}}{%
else
%}{{"Задача:
"+question+"\nПошаговоерешение:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_ru_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_sw.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[24+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
else
%}{{"Swali:
"+question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_sw_direct
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_te.yaml
deleted
100644 → 0
View file @
f052d059
# Generated by utils.py
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nదశలవారీగా
సమాధానం:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nదశలవారీగా
సమాధానం:"}}{%
endif
%}'
filter
:
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_te_direct
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment