Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
90ad5db7
Commit
90ad5db7
authored
Mar 01, 2024
by
lintangsutawika
Browse files
merged main
parents
f692caa9
b177c82c
Changes
484
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
103 additions
and
82 deletions
+103
-82
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_es.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_es.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_fr.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_fr.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ja.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ja.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ru.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ru.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_sw.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_sw.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_te.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_te.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_th.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_th.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml
+12
-0
lm_eval/tasks/mgsm/gen_yaml.sh
lm_eval/tasks/mgsm/gen_yaml.sh
+5
-0
lm_eval/tasks/mgsm/native_cot/cot_yaml
lm_eval/tasks/mgsm/native_cot/cot_yaml
+2
-2
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_bn.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_bn.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_de.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_de.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_en.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_en.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_es.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_es.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_fr.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_fr.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ja.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ja.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ru.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ru.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_sw.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_sw.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_te.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_te.yaml
+0
-8
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_th.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_th.yaml
+0
-8
No files found.
lm_eval/tasks/mgsm/en_cot/mgsm_e
s_en-cot
.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_e
n_cot_es
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
es
dataset_name
:
es
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
0+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
3:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Pregunta:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Pregunta:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
Pregunta:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_e
s_direct
task
:
mgsm_e
n_cot_es
lm_eval/tasks/mgsm/en_cot/mgsm_
fr_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_fr
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
fr
dataset_name
:
fr
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
0+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
6:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question
:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question
:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
Question
:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
fr_direct
task
:
mgsm_
en_cot_fr
lm_eval/tasks/mgsm/en_cot/mgsm_
ja_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_ja
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
ja
dataset_name
:
ja
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
20+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
11:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"問題:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"問題:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
問題:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
ja_direct
task
:
mgsm_
en_cot_ja
lm_eval/tasks/mgsm/en_cot/mgsm_
ru_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_ru
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
ru
dataset_name
:
ru
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
20+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
18:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Задача:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Задача:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
Задача:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
ru_direct
task
:
mgsm_
en_cot_ru
lm_eval/tasks/mgsm/en_cot/mgsm_
sw_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_sw
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
sw
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
0+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
5:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Swali:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Swali:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
Swali:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
sw_direct
task
:
mgsm_
en_cot_sw
lm_eval/tasks/mgsm/en_cot/mgsm_
te_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_te
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
te
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
20+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
19:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
ప్రశ్న:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
te_direct
task
:
mgsm_
en_cot_te
lm_eval/tasks/mgsm/en_cot/mgsm_
th_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_th
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
th
dataset_name
:
th
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
20+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
18:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"โจทย์:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"โจทย์:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
โจทย์:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
th_direct
task
:
mgsm_
en_cot_th
lm_eval/tasks/mgsm/en_cot/mgsm_
zh_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_zh
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
zh
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
20+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
6:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"问题:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"问题:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
问题:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
zh_direct
task
:
mgsm_
en_cot_zh
lm_eval/tasks/mgsm/gen_yaml.sh
0 → 100755
View file @
90ad5db7
#!/bin/bash
python utils.py
--overwrite
--output-dir
direct
--mode
direct
python utils.py
--overwrite
--output-dir
en_cot
--mode
en-cot
python utils.py
--overwrite
--output-dir
native_cot
--mode
native-cot
lm_eval/tasks/mgsm/native_cot/cot_yaml
View file @
90ad5db7
...
@@ -7,7 +7,7 @@ dataset_name: null # Overridden by language-specific config.
...
@@ -7,7 +7,7 @@ dataset_name: null # Overridden by language-specific config.
output_type: generate_until
output_type: generate_until
training_split: train
training_split: train
test_split: test
test_split: test
target_delimiter: ""
#
target_delimiter: ""
generation_kwargs:
generation_kwargs:
until:
until:
- "\n\n"
- "\n\n"
...
@@ -28,4 +28,4 @@ filter_list:
...
@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
- function: "take_first"
metadata:
metadata:
version:
2
.0
version:
3
.0
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_bn.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
bn
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[16+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nধাপে
ধাপে
উত্তর:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nধাপে
ধাপে
উত্তর:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_bn_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_de.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
de
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[28+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nSchritt-für-Schritt-Antwort:"}}{%
else
%}{{"Frage:
"+question+"\nSchritt-für-Schritt-Antwort:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_de_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_en.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
en
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[20+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_en_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_es.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
es
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[22+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRespuesta
paso
a
paso:"}}{%
else
%}{{"Pregunta:
"+question+"\nRespuesta
paso
a
paso:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_es_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_fr.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
fr
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[25+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRéponse
étape
par
étape
:"}}{%
else
%}{{"Question
:
"+question+"\nRéponse
étape
par
étape
:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_fr_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ja.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
ja
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[10+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nステップごとの答え:"}}{%
else
%}{{"問題:
"+question+"\nステップごとの答え:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_ja_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_ru.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
ru
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[17+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nПошаговоерешение:"}}{%
else
%}{{"Задача:
"+question+"\nПошаговоерешение:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_ru_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_sw.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[24+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
else
%}{{"Swali:
"+question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_sw_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_te.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nదశలవారీగా
సమాధానం:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nదశలవారీగా
సమాధానం:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_te_native_cot
lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_th.yaml
deleted
100644 → 0
View file @
f692caa9
# Generated by utils.py
dataset_name
:
th
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[17+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nคำตอบทีละขั้นตอน:"}}{%
else
%}{{"โจทย์:
"+question+"\nคำตอบทีละขั้นตอน:"}}{%
endif
%}'
include
:
cot_yaml
task
:
mgsm_th_native_cot
Prev
1
…
15
16
17
18
19
20
21
22
23
…
25
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment