Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
90ad5db7
Commit
90ad5db7
authored
Mar 01, 2024
by
lintangsutawika
Browse files
merged main
parents
f692caa9
b177c82c
Changes
484
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
164 additions
and
55 deletions
+164
-55
lm_eval/tasks/kmmlu/kmmlu_telecommunications_and_wireless_technology.yaml
...mlu/kmmlu_telecommunications_and_wireless_technology.yaml
+0
-3
lm_eval/tasks/kobest/utils.py
lm_eval/tasks/kobest/utils.py
+13
-2
lm_eval/tasks/medmcqa/utils_medmcqa.py
lm_eval/tasks/medmcqa/utils_medmcqa.py
+6
-1
lm_eval/tasks/medqa/preprocess_medqa.py
lm_eval/tasks/medqa/preprocess_medqa.py
+6
-1
lm_eval/tasks/mgsm/direct/direct_yaml
lm_eval/tasks/mgsm/direct/direct_yaml
+7
-1
lm_eval/tasks/mgsm/direct/mgsm_direct_bn.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_bn.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_de.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_de.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_en.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_en.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_es.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_es.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_fr.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_fr.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_ru.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_ru.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_sw.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_sw.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_te.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_te.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_th.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_th.yaml
+8
-4
lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml
+8
-4
lm_eval/tasks/mgsm/en_cot/cot_yaml
lm_eval/tasks/mgsm/en_cot/cot_yaml
+8
-3
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_bn.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_bn.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_de.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_de.yaml
+12
-0
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_en.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_en.yaml
+12
-0
No files found.
lm_eval/tasks/kmmlu/kmmlu_telecommunications_and_wireless_technology.yaml
deleted
100644 → 0
View file @
f692caa9
"
dataset_name"
:
"
Telecommunications-and-Wireless-Technology"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_telecommunications_and_wireless_technology"
lm_eval/tasks/kobest/utils.py
View file @
90ad5db7
...
@@ -6,32 +6,43 @@ def copa_doc_to_text(doc: dict) -> str:
...
@@ -6,32 +6,43 @@ def copa_doc_to_text(doc: dict) -> str:
connector
=
{
"원인"
:
" 왜냐하면"
,
"결과"
:
" 그래서"
}[
doc
[
"question"
].
strip
()]
connector
=
{
"원인"
:
" 왜냐하면"
,
"결과"
:
" 그래서"
}[
doc
[
"question"
].
strip
()]
return
f
"""
{
doc
[
"premise"
]
}
{
connector
}
"""
return
f
"""
{
doc
[
"premise"
]
}
{
connector
}
"""
def
copa_doc_to_target
(
doc
:
dict
)
->
str
:
def
copa_doc_to_target
(
doc
:
dict
)
->
str
:
correct_choice
=
doc
[
"alternative_1"
]
if
doc
[
"label"
]
==
0
else
doc
[
"alternative_2"
]
correct_choice
=
doc
[
"alternative_1"
]
if
doc
[
"label"
]
==
0
else
doc
[
"alternative_2"
]
return
f
"""
{
correct_choice
}
"""
return
f
"""
{
correct_choice
}
"""
def
copa_doc_to_choice
(
doc
:
dict
)
->
list
:
def
copa_doc_to_choice
(
doc
:
dict
)
->
list
:
return
[
f
"""
{
doc
[
"alternative_1"
]
}
"""
,
f
"""
{
doc
[
"alternative_2"
]
}
"""
]
return
[
f
"""
{
doc
[
"alternative_1"
]
}
"""
,
f
"""
{
doc
[
"alternative_2"
]
}
"""
]
def
sentineg_doc_to_text
(
doc
:
dict
):
def
sentineg_doc_to_text
(
doc
:
dict
):
return
f
"""문장:
{
doc
[
"sentence"
]
}
긍부정:"""
return
f
"""문장:
{
doc
[
"sentence"
]
}
긍부정:"""
def
wic_doc_to_text
(
doc
:
dict
)
->
str
:
def
wic_doc_to_text
(
doc
:
dict
)
->
str
:
return
f
"""문장1:
{
doc
[
"context_1"
]
}
문장2:
{
doc
[
"context_2"
]
}
두 문장에서
{
doc
[
"word"
]
}
가 같은 뜻으로 쓰였나?"""
return
f
"""문장1:
{
doc
[
"context_1"
]
}
문장2:
{
doc
[
"context_2"
]
}
두 문장에서
{
doc
[
"word"
]
}
가 같은 뜻으로 쓰였나?"""
def
hellaswag_process_doc
(
doc
:
Dataset
)
->
Dataset
:
def
hellaswag_process_doc
(
doc
:
Dataset
)
->
Dataset
:
def
preprocessor
(
dataset
):
def
preprocessor
(
dataset
):
return
{
return
{
"query"
:
f
"""문장:
{
dataset
[
"context"
]
}
"""
,
"query"
:
f
"""문장:
{
dataset
[
"context"
]
}
"""
,
"choices"
:
[
dataset
[
"ending_1"
],
dataset
[
"ending_2"
],
dataset
[
"ending_3"
],
dataset
[
"ending_4"
]],
"choices"
:
[
dataset
[
"ending_1"
],
dataset
[
"ending_2"
],
dataset
[
"ending_3"
],
dataset
[
"ending_4"
],
],
"gold"
:
int
(
dataset
[
"label"
]),
"gold"
:
int
(
dataset
[
"label"
]),
}
}
return
doc
.
map
(
preprocessor
)
return
doc
.
map
(
preprocessor
)
def
macro_f1_score
(
items
):
def
macro_f1_score
(
items
):
unzipped_list
=
list
(
zip
(
*
items
))
unzipped_list
=
list
(
zip
(
*
items
))
golds
=
unzipped_list
[
0
]
golds
=
unzipped_list
[
0
]
preds
=
unzipped_list
[
1
]
preds
=
unzipped_list
[
1
]
fscore
=
f1_score
(
golds
,
preds
,
average
=
'
macro
'
)
fscore
=
f1_score
(
golds
,
preds
,
average
=
"
macro
"
)
return
fscore
return
fscore
lm_eval/tasks/medmcqa/utils_medmcqa.py
View file @
90ad5db7
...
@@ -10,7 +10,12 @@ def doc_to_text(doc) -> str:
...
@@ -10,7 +10,12 @@ def doc_to_text(doc) -> str:
Answer:
Answer:
"""
"""
choices
=
[
doc
[
"opa"
],
doc
[
"opb"
],
doc
[
"opc"
],
doc
[
"opd"
]]
choices
=
[
doc
[
"opa"
],
doc
[
"opb"
],
doc
[
"opc"
],
doc
[
"opd"
]]
option_choices
=
{
'A'
:
choices
[
0
],
'B'
:
choices
[
1
],
'C'
:
choices
[
2
],
'D'
:
choices
[
3
]}
option_choices
=
{
"A"
:
choices
[
0
],
"B"
:
choices
[
1
],
"C"
:
choices
[
2
],
"D"
:
choices
[
3
],
}
prompt
=
"Question: "
+
doc
[
"question"
]
+
"
\n
Choices:
\n
"
prompt
=
"Question: "
+
doc
[
"question"
]
+
"
\n
Choices:
\n
"
for
choice
,
option
in
option_choices
.
items
():
for
choice
,
option
in
option_choices
.
items
():
...
...
lm_eval/tasks/medqa/preprocess_medqa.py
View file @
90ad5db7
def
doc_to_text
(
doc
)
->
str
:
def
doc_to_text
(
doc
)
->
str
:
option_choices
=
{
'A'
:
doc
[
"ending0"
],
'B'
:
doc
[
"ending1"
],
'C'
:
doc
[
"ending2"
],
'D'
:
doc
[
"ending3"
]}
option_choices
=
{
"A"
:
doc
[
"ending0"
],
"B"
:
doc
[
"ending1"
],
"C"
:
doc
[
"ending2"
],
"D"
:
doc
[
"ending3"
],
}
answers
=
""
.
join
((
f
"
{
k
}
.
{
v
}
\n
"
)
for
k
,
v
in
option_choices
.
items
())
answers
=
""
.
join
((
f
"
{
k
}
.
{
v
}
\n
"
)
for
k
,
v
in
option_choices
.
items
())
return
f
"Question:
{
doc
[
'sent1'
]
}
\n
{
answers
}
Answer:"
return
f
"Question:
{
doc
[
'sent1'
]
}
\n
{
answers
}
Answer:"
...
...
lm_eval/tasks/mgsm/direct/direct_yaml
View file @
90ad5db7
...
@@ -19,6 +19,12 @@ filter_list:
...
@@ -19,6 +19,12 @@ filter_list:
filter:
filter:
- function: remove_whitespace
- function: remove_whitespace
- function: take_first
- function: take_first
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
metric_list:
metric_list:
- metric: exact_match
- metric: exact_match
aggregation: mean
aggregation: mean
...
@@ -26,4 +32,4 @@ metric_list:
...
@@ -26,4 +32,4 @@ metric_list:
ignore_case: true
ignore_case: true
ignore_punctuation: true
ignore_punctuation: true
metadata:
metadata:
version:
1
.0
version:
2
.0
lm_eval/tasks/mgsm/direct/mgsm_direct_bn.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
bn
dataset_name
:
bn
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[17:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nAnswer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"প্রশ্ন:
generation_kwargs
:
"+question+"\nAnswer"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
প্রশ্ন:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_bn
task
:
mgsm_direct_bn
lm_eval/tasks/mgsm/direct/mgsm_direct_de.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
de
dataset_name
:
de
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[7+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[29:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAntwort:"}}{%
else
%}{{"Frage:
"+question+"\nAntwort:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAntwort"}}{%
else
%}{{"Frage:
generation_kwargs
:
"+question+"\nAntwort"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
Frage:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_de
task
:
mgsm_direct_de
lm_eval/tasks/mgsm/direct/mgsm_direct_en.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
en
dataset_name
:
en
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[21:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer:"}}{%
else
%}{{"Question:
"+question+"\nAnswer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Question:
generation_kwargs
:
"+question+"\nAnswer"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
Question:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_en
task
:
mgsm_direct_en
lm_eval/tasks/mgsm/direct/mgsm_direct_es.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
es
dataset_name
:
es
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[23:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRespuesta:"}}{%
else
%}{{"Pregunta:
"+question+"\nRespuesta:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Pregunta:
generation_kwargs
:
"+question+"\nAnswer"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
Pregunta:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_es
task
:
mgsm_direct_es
lm_eval/tasks/mgsm/direct/mgsm_direct_fr.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
fr
dataset_name
:
fr
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[26:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRéponse
:"}}{%
else
%}{{"Question
:
"+question+"\nRéponse
:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Question
generation_kwargs
:
:
"+question+"\nAnswer"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
Question
:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_fr
task
:
mgsm_direct_fr
lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
ja
dataset_name
:
ja
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[11:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer:"}}{%
else
%}{{"問題:
"+question+"\nAnswer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"問題:
"+question+"\nAnswer"}}{%
generation_kwargs
:
endif
%}'
do_sample
:
false
until
:
-
'
問題:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_ja
task
:
mgsm_direct_ja
lm_eval/tasks/mgsm/direct/mgsm_direct_ru.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
ru
dataset_name
:
ru
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer:"}}{%
else
%}{{"Задача:
"+question+"\nAnswer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Задача:
generation_kwargs
:
"+question+"\nAnswer"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
Задача:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_ru
task
:
mgsm_direct_ru
lm_eval/tasks/mgsm/direct/mgsm_direct_sw.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
sw
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[25:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer:"}}{%
else
%}{{"Swali:
"+question+"\nAnswer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Swali:
generation_kwargs
:
"+question+"\nAnswer"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
Swali:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_sw
task
:
mgsm_direct_sw
lm_eval/tasks/mgsm/direct/mgsm_direct_te.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
te
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[19:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nAnswer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"ప్రశ్న:
generation_kwargs
:
"+question+"\nAnswer"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
ప్రశ్న:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_te
task
:
mgsm_direct_te
lm_eval/tasks/mgsm/direct/mgsm_direct_th.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
th
dataset_name
:
th
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[18:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer:"}}{%
else
%}{{"โจทย์:
"+question+"\nAnswer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"โจทย์:
generation_kwargs
:
"+question+"\nAnswer"}}{%
endif
%}'
do_sample
:
false
until
:
-
'
โจทย์:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_th
task
:
mgsm_direct_th
lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
zh
dataset_name
:
zh
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6:]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer:"}}{%
else
%}{{"问题:
"+question+"\nAnswer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"问题:
"+question+"\nAnswer"}}{%
generation_kwargs
:
endif
%}'
do_sample
:
false
until
:
-
'
问题:'
-
</s>
-
<|im_end|>
include
:
direct_yaml
include
:
direct_yaml
task
:
mgsm_direct_zh
task
:
mgsm_direct_zh
lm_eval/tasks/mgsm/en_cot/cot_yaml
View file @
90ad5db7
...
@@ -7,7 +7,6 @@ dataset_name: null # Overridden by language-specific config.
...
@@ -7,7 +7,6 @@ dataset_name: null # Overridden by language-specific config.
output_type: generate_until
output_type: generate_until
training_split: train
training_split: train
test_split: test
test_split: test
target_delimiter: ""
generation_kwargs:
generation_kwargs:
until:
until:
- "\n\n"
- "\n\n"
...
@@ -22,10 +21,16 @@ metric_list:
...
@@ -22,10 +21,16 @@ metric_list:
ignore_case: true
ignore_case: true
ignore_punctuation: true
ignore_punctuation: true
filter_list:
filter_list:
- name: "
get-answer
"
- name: "
strict-match
"
filter:
filter:
- function: "regex"
- function: "regex"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
- function: "take_first"
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
metadata:
metadata:
version:
1
.0
version:
2
.0
lm_eval/tasks/mgsm/en_cot/mgsm_
bn_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_bn
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
bn
dataset_name
:
bn
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
20+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[
17:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
প্রশ্ন:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
b
n_
direct
task
:
mgsm_
e
n_
cot_bn
lm_eval/tasks/mgsm/en_cot/mgsm_
de_
en
-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en
_
cot
_de
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
de
dataset_name
:
de
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
0+1
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
9:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Frage:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Frage:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
Frage:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_
de_direct
task
:
mgsm_
en_cot_de
lm_eval/tasks/mgsm/en_cot/mgsm_en_
en-
cot.yaml
→
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot
_en
.yaml
View file @
90ad5db7
# Generated by utils.py
# Generated by utils.py
dataset_name
:
en
dataset_name
:
en
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[2
0+
1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[21
:
]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
generation_kwargs
:
do_sample
:
false
until
:
-
'
Question:'
-
</s>
-
<|im_end|>
include
:
cot_yaml
include
:
cot_yaml
task
:
mgsm_en_
direct
task
:
mgsm_en_
cot_en
Prev
1
…
14
15
16
17
18
19
20
21
22
…
25
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment