Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
56a4e794
Unverified
Commit
56a4e794
authored
Jul 15, 2024
by
Lintang Sutawika
Committed by
GitHub
Jul 15, 2024
Browse files
formatting (#2104)
parent
9884ad6e
Changes
54
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
92 additions
and
72 deletions
+92
-72
lm_eval/api/metrics.py
lm_eval/api/metrics.py
+1
-1
lm_eval/api/task.py
lm_eval/api/task.py
+1
-1
lm_eval/filters/extraction.py
lm_eval/filters/extraction.py
+1
-1
lm_eval/tasks/afrimgsm/README.md
lm_eval/tasks/afrimgsm/README.md
+4
-4
lm_eval/tasks/afrimgsm/run.sh
lm_eval/tasks/afrimgsm/run.sh
+1
-1
lm_eval/tasks/afrimgsm/utils.py
lm_eval/tasks/afrimgsm/utils.py
+64
-42
lm_eval/tasks/afrimmlu/README.md
lm_eval/tasks/afrimmlu/README.md
+4
-4
lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
+6
-6
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
+0
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
+0
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
+1
-1
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
+1
-1
No files found.
lm_eval/api/metrics.py
View file @
56a4e794
lm_eval/api/task.py
View file @
56a4e794
lm_eval/filters/extraction.py
View file @
56a4e794
lm_eval/tasks/afrimgsm/README.md
View file @
56a4e794
lm_eval/tasks/afrimgsm/run.sh
View file @
56a4e794
lm_eval/tasks/afrimgsm/utils.py
View file @
56a4e794
...
@@ -2,51 +2,74 @@ import argparse
...
@@ -2,51 +2,74 @@ import argparse
import
yaml
import
yaml
languages
=
[
'eng'
,
'amh'
,
'ibo'
,
'fra'
,
'sna'
,
'lin'
,
'wol'
,
'ewe'
,
'lug'
,
'xho'
,
'kin'
,
'twi'
,
'zul'
,
'orm'
,
'yor'
,
'hau'
,
'sot'
,
'swa'
]
languages_REGEX
=
{
"eng"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
languages
=
[
"eng"
,
"amh"
,
"ibo"
,
"fra"
,
"sna"
,
"lin"
,
"wol"
,
"ewe"
,
"lug"
,
"xho"
,
"kin"
,
"twi"
,
"zul"
,
"orm"
,
"yor"
,
"hau"
,
"sot"
,
"swa"
,
]
languages_REGEX
=
{
"eng"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
,
"amh"
:
"መልሱ (
\\
-?[0-9
\\
.
\\
,]+)"
,
"amh"
:
"መልሱ (
\\
-?[0-9
\\
.
\\
,]+)"
,
"ibo"
:
"Azịza ya bụ (
\\
-?[0-9
\\
.
\\
,]+)"
,
"ibo"
:
"Azịza ya bụ (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
fra
'
:
"La réponse est(
\\
-?[0-9
\\
.
\\
,]+)"
,
"
fra
"
:
"La réponse est(
\\
-?[0-9
\\
.
\\
,]+)"
,
'
sna
'
:
"Mhinduro kumubvunzo ndi (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
sna
"
:
"Mhinduro kumubvunzo ndi (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
lin
'
:
"Eyano ezali (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
lin
"
:
"Eyano ezali (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
wol
'
:
"Tontu li (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
wol
"
:
"Tontu li (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
ewe
'
:
"ŋuɖoɖoae nye (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
ewe
"
:
"ŋuɖoɖoae nye (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
lug
'
:
"Ansa eri (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
lug
"
:
"Ansa eri (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
xho
'
:
"Impendulo ngu (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
xho
"
:
"Impendulo ngu (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
kin
'
:
"Igisubizo ni (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
kin
"
:
"Igisubizo ni (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
twi
'
:
"Ne nnyiano yɛ (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
twi
"
:
"Ne nnyiano yɛ (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
zul
'
:
"Impendulo ithi (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
zul
"
:
"Impendulo ithi (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
orm
'
:
"Deebiin isaa (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
orm
"
:
"Deebiin isaa (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
yor
'
:
"Ìdáhùn náà ni (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
yor
"
:
"Ìdáhùn náà ni (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
hau
'
:
"Amsar ita ce (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
hau
"
:
"Amsar ita ce (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
sot
'
:
"Karabo ke (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
sot
"
:
"Karabo ke (
\\
-?[0-9
\\
.
\\
,]+)"
,
'
swa
'
:
"Jibu ni (
\\
-?[0-9
\\
.
\\
,]+)"
,
"
swa
"
:
"Jibu ni (
\\
-?[0-9
\\
.
\\
,]+)"
,
}
}
LANGUAGES
=
{}
LANGUAGES
=
{}
for
lang
in
languages
:
for
lang
in
languages
:
if
lang
==
'
amh
'
:
if
lang
==
"
amh
"
:
LANGUAGES
[
lang
]
=
{
# English
LANGUAGES
[
lang
]
=
{
# English
"QUESTION"
:
"ጥያቄ:"
,
"QUESTION"
:
"ጥያቄ:"
,
"ANSWER"
:
"በቅደም ተከተል መልስ:"
,
"ANSWER"
:
"በቅደም ተከተል መልስ:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
languages_REGEX
[
lang
]}
"REGEX"
:
languages_REGEX
[
lang
],
elif
lang
==
'yor'
:
}
elif
lang
==
"yor"
:
LANGUAGES
[
lang
]
=
{
# English
LANGUAGES
[
lang
]
=
{
# English
"QUESTION"
:
"Ìbéèrè:"
,
"QUESTION"
:
"Ìbéèrè:"
,
"ANSWER"
:
"Ìdáhùn lẹ́sẹsẹ:"
,
"ANSWER"
:
"Ìdáhùn lẹ́sẹsẹ:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
languages_REGEX
[
lang
]}
"REGEX"
:
languages_REGEX
[
lang
],
}
else
:
else
:
LANGUAGES
[
lang
]
=
{
# English
LANGUAGES
[
lang
]
=
{
# English
"QUESTION"
:
"Question:"
,
"QUESTION"
:
"Question:"
,
"ANSWER"
:
"Step-by-Step Answer:"
,
"ANSWER"
:
"Step-by-Step Answer:"
,
"DIRECT"
:
"Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
languages_REGEX
[
lang
]}
"REGEX"
:
languages_REGEX
[
lang
],
}
def
add_regex_pattern
(
regex_pattern
):
def
add_regex_pattern
(
regex_pattern
):
...
@@ -93,13 +116,12 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -93,13 +116,12 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
err
=
[]
err
=
[]
for
lang
in
LANGUAGES
.
keys
():
for
lang
in
LANGUAGES
.
keys
():
try
:
try
:
yaml_template
=
"cot_yaml"
yaml_template
=
"cot_yaml"
filter_list
=
{}
filter_list
=
{}
DELIMITER
=
None
DELIMITER
=
None
if
mode
==
"direct"
:
if
mode
==
"direct"
:
ANSWER
=
LANGUAGES
[
'
eng
'
][
"DIRECT"
]
ANSWER
=
LANGUAGES
[
"
eng
"
][
"DIRECT"
]
QUESTION
=
LANGUAGES
[
'
eng
'
][
"QUESTION"
]
QUESTION
=
LANGUAGES
[
"
eng
"
][
"QUESTION"
]
REGEX
=
None
REGEX
=
None
task_name
=
f
"afrimgsm_direct_
{
lang
}
"
task_name
=
f
"afrimgsm_direct_
{
lang
}
"
yaml_template
=
"direct_yaml"
yaml_template
=
"direct_yaml"
...
@@ -122,8 +144,8 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -122,8 +144,8 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
QUESTION
=
LANGUAGES
[
"eng"
][
"QUESTION"
]
QUESTION
=
LANGUAGES
[
"eng"
][
"QUESTION"
]
task_name
=
f
"afrimgsm_en_cot_
{
lang
}
"
task_name
=
f
"afrimgsm_en_cot_
{
lang
}
"
elif
mode
==
"translate-direct"
:
elif
mode
==
"translate-direct"
:
ANSWER
=
LANGUAGES
[
'
eng
'
][
"DIRECT"
]
ANSWER
=
LANGUAGES
[
"
eng
"
][
"DIRECT"
]
QUESTION
=
LANGUAGES
[
'
eng
'
][
"QUESTION"
]
QUESTION
=
LANGUAGES
[
"
eng
"
][
"QUESTION"
]
REGEX
=
None
REGEX
=
None
task_name
=
f
"afrimgsm_translate_direct_
{
lang
}
"
task_name
=
f
"afrimgsm_translate_direct_
{
lang
}
"
yaml_template
=
"translate_direct_yaml"
yaml_template
=
"translate_direct_yaml"
...
...
lm_eval/tasks/afrimmlu/README.md
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
View file @
56a4e794
dataset_name
:
eng
dataset_name
:
eng
include
:
afrimmlu_common_yaml
include
:
afrimmlu_common_yaml
task
:
afrimmlu_direct_eng
task
:
afrimmlu_direct_eng
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
View file @
56a4e794
dataset_name
:
ewe
dataset_name
:
ewe
include
:
afrimmlu_common_yaml
include
:
afrimmlu_common_yaml
task
:
afrimmlu_direct_ewe
task
:
afrimmlu_direct_ewe
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
View file @
56a4e794
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
View file @
56a4e794
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment