Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
83f95961
Commit
83f95961
authored
Aug 16, 2023
by
lintangsutawika
Browse files
add task variants
parent
d99e6cf4
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
167 additions
and
19 deletions
+167
-19
lm_eval/tasks/mgsm/cot_yaml
lm_eval/tasks/mgsm/cot_yaml
+23
-0
lm_eval/tasks/mgsm/direct_yaml
lm_eval/tasks/mgsm/direct_yaml
+0
-1
lm_eval/tasks/mgsm/mgsm_bn_direct.yaml
lm_eval/tasks/mgsm/mgsm_bn_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_bn_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_bn_native-cot.yaml
+8
-2
lm_eval/tasks/mgsm/mgsm_de_direct.yaml
lm_eval/tasks/mgsm/mgsm_de_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_de_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_de_native-cot.yaml
+8
-2
lm_eval/tasks/mgsm/mgsm_en_direct.yaml
lm_eval/tasks/mgsm/mgsm_en_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_en_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_en_native-cot.yaml
+8
-2
lm_eval/tasks/mgsm/mgsm_es_direct.yaml
lm_eval/tasks/mgsm/mgsm_es_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_es_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_es_native-cot.yaml
+8
-2
lm_eval/tasks/mgsm/mgsm_fr_direct.yaml
lm_eval/tasks/mgsm/mgsm_fr_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_fr_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_fr_native-cot.yaml
+8
-2
lm_eval/tasks/mgsm/mgsm_ja_direct.yaml
lm_eval/tasks/mgsm/mgsm_ja_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_ja_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_ja_native-cot.yaml
+8
-2
lm_eval/tasks/mgsm/mgsm_ru_direct.yaml
lm_eval/tasks/mgsm/mgsm_ru_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_ru_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_ru_native-cot.yaml
+8
-2
lm_eval/tasks/mgsm/mgsm_sw_direct.yaml
lm_eval/tasks/mgsm/mgsm_sw_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_sw_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_sw_native-cot.yaml
+8
-2
lm_eval/tasks/mgsm/mgsm_te_direct.yaml
lm_eval/tasks/mgsm/mgsm_te_direct.yaml
+8
-0
lm_eval/tasks/mgsm/mgsm_te_native-cot.yaml
lm_eval/tasks/mgsm/mgsm_te_native-cot.yaml
+8
-2
No files found.
lm_eval/tasks/mgsm/cot_yaml
0 → 100644
View file @
83f95961
# This file will be included in the generated language-specific task configs.
# It doesn't have a yaml file extension as it is not meant to be imported directly
# by the harness.
group: mgsm
dataset_path: juletxara/mgsm
dataset_name: null # Overridden by language-specific config.
output_type: greedy_until
training_split: train
test_split: test
target_delimiter: ""
generation_kwargs:
until:
- "\n\n"
- "\n"
do_sample: false
temperature: 0.0
target_delimiter: " "
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
lm_eval/tasks/mgsm/
common_template
_yaml
→
lm_eval/tasks/mgsm/
direct
_yaml
View file @
83f95961
...
@@ -19,7 +19,6 @@ filter_list:
...
@@ -19,7 +19,6 @@ filter_list:
filter:
filter:
- function: remove_whitespace
- function: remove_whitespace
- function: take_first
- function: take_first
target_delimiter: " "
metric_list:
metric_list:
- metric: exact_match
- metric: exact_match
aggregation: mean
aggregation: mean
...
...
lm_eval/tasks/mgsm/mgsm_bn_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
bn
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nAnswer"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_bn_direct
lm_eval/tasks/mgsm/mgsm_bn.yaml
→
lm_eval/tasks/mgsm/mgsm_bn
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[16+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[16+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nধাপে
ধাপে
উত্তর:"}}{%
else
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nধাপে
ধাপে
উত্তর:"}}{%
else
%}{{"প্রশ্ন:
"+question+"\nধাপে
ধাপে
উত্তর:"}}{%
endif
%}'
%}{{"প্রশ্ন:
"+question+"\nধাপে
ধাপে
উত্তর:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_bn
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_bn_direct
lm_eval/tasks/mgsm/mgsm_de_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
de
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[7+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAntwort"}}{%
else
%}{{"Frage:
"+question+"\nAntwort"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_de_direct
lm_eval/tasks/mgsm/mgsm_de.yaml
→
lm_eval/tasks/mgsm/mgsm_de
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[28+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[28+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nSchritt-für-Schritt-Antwort:"}}{%
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nSchritt-für-Schritt-Antwort:"}}{%
else
%}{{"Frage:
"+question+"\nSchritt-für-Schritt-Antwort:"}}{%
endif
%}'
else
%}{{"Frage:
"+question+"\nSchritt-für-Schritt-Antwort:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_de
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_de_direct
lm_eval/tasks/mgsm/mgsm_en_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
en
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Question:
"+question+"\nAnswer"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_en_direct
lm_eval/tasks/mgsm/mgsm_en.yaml
→
lm_eval/tasks/mgsm/mgsm_en
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[20+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[20+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nStep-by-Step
Answer:"}}{%
else
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
%}{{"Question:
"+question+"\nStep-by-Step
Answer:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_en
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_en_direct
lm_eval/tasks/mgsm/mgsm_es_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
es
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Pregunta:
"+question+"\nAnswer"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_es_direct
lm_eval/tasks/mgsm/mgsm_es.yaml
→
lm_eval/tasks/mgsm/mgsm_es
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[22+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[22+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRespuesta
paso
a
paso:"}}{%
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRespuesta
paso
a
paso:"}}{%
else
%}{{"Pregunta:
"+question+"\nRespuesta
paso
a
paso:"}}{%
endif
%}'
else
%}{{"Pregunta:
"+question+"\nRespuesta
paso
a
paso:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_es
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_es_direct
lm_eval/tasks/mgsm/mgsm_fr_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
fr
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Question
:
"+question+"\nAnswer"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_fr_direct
lm_eval/tasks/mgsm/mgsm_fr.yaml
→
lm_eval/tasks/mgsm/mgsm_fr
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[25+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[25+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRéponse
étape
par
étape
:"}}{%
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nRéponse
étape
par
étape
:"}}{%
else
%}{{"Question
:
"+question+"\nRéponse
étape
par
étape
:"}}{%
endif
%}'
else
%}{{"Question
:
"+question+"\nRéponse
étape
par
étape
:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_fr
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_fr_direct
lm_eval/tasks/mgsm/mgsm_ja_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
ja
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"問題:
"+question+"\nAnswer"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_ja_direct
lm_eval/tasks/mgsm/mgsm_ja.yaml
→
lm_eval/tasks/mgsm/mgsm_ja
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[10+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[10+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nステップごとの答え:"}}{%
else
%}{{"問題:
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nステップごとの答え:"}}{%
else
%}{{"問題:
"+question+"\nステップごとの答え:"}}{%
endif
%}'
"+question+"\nステップごとの答え:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_ja
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_ja_direct
lm_eval/tasks/mgsm/mgsm_ru_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
ru
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Задача:
"+question+"\nAnswer"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_ru_direct
lm_eval/tasks/mgsm/mgsm_ru.yaml
→
lm_eval/tasks/mgsm/mgsm_ru
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[17+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[17+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nПошаговоерешение:"}}{%
else
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nПошаговоерешение:"}}{%
else
%}{{"Задача:
"+question+"\nПошаговоерешение:"}}{%
endif
%}'
%}{{"Задача:
"+question+"\nПошаговоерешение:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_ru
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_ru_direct
lm_eval/tasks/mgsm/mgsm_sw_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
sw
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"Swali:
"+question+"\nAnswer"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_sw_direct
lm_eval/tasks/mgsm/mgsm_sw.yaml
→
lm_eval/tasks/mgsm/mgsm_sw
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[24+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[24+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
else
%}{{"Swali:
"+question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
endif
%}'
else
%}{{"Swali:
"+question+"\nJibu
la
Hatua
kwa
Hatua:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_sw
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_sw_direct
lm_eval/tasks/mgsm/mgsm_te_direct.yaml
0 → 100644
View file @
83f95961
# Generated by utils.py
dataset_name
:
te
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[6+1]}}{%
else
%}{{answer_number|string}}{%
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nAnswer"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nAnswer"}}{%
endif
%}'
include
:
direct_yaml
task
:
mgsm_te_direct
lm_eval/tasks/mgsm/mgsm_te.yaml
→
lm_eval/tasks/mgsm/mgsm_te
_native-cot
.yaml
View file @
83f95961
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[18+1]}}{% else %}{{answer_nu
...
@@ -4,5 +4,11 @@ doc_to_target: '{% if answer is not none %}{{answer[18+1]}}{% else %}{{answer_nu
endif
%}'
endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nదశలవారీగా
సమాధానం:"}}{%
else
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nదశలవారీగా
సమాధానం:"}}{%
else
%}{{"ప్రశ్న:
"+question+"\nదశలవారీగా
సమాధానం:"}}{%
endif
%}'
%}{{"ప్రశ్న:
"+question+"\nదశలవారీగా
సమాధానం:"}}{%
endif
%}'
include
:
common_template_yaml
filter
:
task
:
mgsm_te
-
function
:
regex
regex_pattern
:
The answer is (\-?[0-9\.\,]+)
-
function
:
take_first
filter_list
:
-
name
:
get-answer
include
:
cot_yaml
task
:
mgsm_te_direct
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment