Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
25869601
Commit
25869601
authored
Oct 19, 2024
by
Baber
Browse files
Merge branch 'main' into mathvista
# Conflicts: # lm_eval/models/hf_vlms.py
parents
56f40c53
c1d8795d
Changes
253
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
530 additions
and
0 deletions
+530
-0
lm_eval/tasks/basque_bench/flores_eu/_flores_common_yaml
lm_eval/tasks/basque_bench/flores_eu/_flores_common_yaml
+27
-0
lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py
...al/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py
+333
-0
lm_eval/tasks/basque_bench/flores_eu/flores_ca-eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_ca-eu.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_de-eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_de-eu.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_en-eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_en-eu.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_es-eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_es-eu.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu-ca.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu-ca.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu-de.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu-de.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu-en.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu-en.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu-es.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu-es.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu-fr.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu-fr.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu-gl.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu-gl.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu-it.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu-it.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu-pt.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu-pt.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_eu.yaml
+24
-0
lm_eval/tasks/basque_bench/flores_eu/flores_fr-eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_fr-eu.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_gl-eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_gl-eu.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_it-eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_it-eu.yaml
+7
-0
lm_eval/tasks/basque_bench/flores_eu/flores_pt-eu.yaml
lm_eval/tasks/basque_bench/flores_eu/flores_pt-eu.yaml
+7
-0
lm_eval/tasks/basque_bench/mgsm_cot_native_eu.yaml
lm_eval/tasks/basque_bench/mgsm_cot_native_eu.yaml
+34
-0
No files found.
lm_eval/tasks/basque_bench/flores_eu/_flores_common_yaml
0 → 100644
View file @
25869601
tag: flores
dataset_path: facebook/flores
dataset_name: all
output_type: generate_until
#! The test split of flores is not publicly available! (See paper section 6.1)
training_split: dev
validation_split: dev
test_split: devtest
fewshot_split: dev
target_delimiter: ''
generation_kwargs:
until:
- "\n"
metric_list:
- metric: bleu
aggregation: bleu
higher_is_better: true
- metric: ter
aggregation: ter
higher_is_better: false
- metric: chrf
aggregation: chrf
higher_is_better: true
metadata:
version: 0.1
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py
0 → 100644
View file @
25869601
# ruff: noqa: E731, E741
"""
Script to generate task YAMLs for the FLORES-200 dataset.
Based on `tasks/translation/utils.py`.
"""
import
argparse
import
itertools
import
yaml
from
langcodes
import
Language
# utils
flatten
=
lambda
l
:
list
(
itertools
.
chain
(
*
l
))
# constants
_LANGUAGES
=
[
"ace_Arab"
,
"bam_Latn"
,
"dzo_Tibt"
,
"hin_Deva"
,
"khm_Khmr"
,
"mag_Deva"
,
"pap_Latn"
,
"sot_Latn"
,
"tur_Latn"
,
"ace_Latn"
,
"ban_Latn"
,
"ell_Grek"
,
"hne_Deva"
,
"kik_Latn"
,
"mai_Deva"
,
"pbt_Arab"
,
"spa_Latn"
,
"twi_Latn"
,
"acm_Arab"
,
"bel_Cyrl"
,
"eng_Latn"
,
"hrv_Latn"
,
"kin_Latn"
,
"mal_Mlym"
,
"pes_Arab"
,
"srd_Latn"
,
"tzm_Tfng"
,
"acq_Arab"
,
"bem_Latn"
,
"epo_Latn"
,
"hun_Latn"
,
"kir_Cyrl"
,
"mar_Deva"
,
"plt_Latn"
,
"srp_Cyrl"
,
"uig_Arab"
,
"aeb_Arab"
,
"ben_Beng"
,
"est_Latn"
,
"hye_Armn"
,
"kmb_Latn"
,
"min_Arab"
,
"pol_Latn"
,
"ssw_Latn"
,
"ukr_Cyrl"
,
"afr_Latn"
,
"bho_Deva"
,
"eus_Latn"
,
"ibo_Latn"
,
"kmr_Latn"
,
"min_Latn"
,
"por_Latn"
,
"sun_Latn"
,
"umb_Latn"
,
"ajp_Arab"
,
"bjn_Arab"
,
"ewe_Latn"
,
"ilo_Latn"
,
"knc_Arab"
,
"mkd_Cyrl"
,
"prs_Arab"
,
"swe_Latn"
,
"urd_Arab"
,
"aka_Latn"
,
"bjn_Latn"
,
"fao_Latn"
,
"ind_Latn"
,
"knc_Latn"
,
"mlt_Latn"
,
"quy_Latn"
,
"swh_Latn"
,
"uzn_Latn"
,
"als_Latn"
,
"bod_Tibt"
,
"fij_Latn"
,
"isl_Latn"
,
"kon_Latn"
,
"mni_Beng"
,
"ron_Latn"
,
"szl_Latn"
,
"vec_Latn"
,
"amh_Ethi"
,
"bos_Latn"
,
"fin_Latn"
,
"ita_Latn"
,
"kor_Hang"
,
"mos_Latn"
,
"run_Latn"
,
"tam_Taml"
,
"vie_Latn"
,
"apc_Arab"
,
"bug_Latn"
,
"fon_Latn"
,
"jav_Latn"
,
"lao_Laoo"
,
"mri_Latn"
,
"rus_Cyrl"
,
"taq_Latn"
,
"war_Latn"
,
"arb_Arab"
,
"bul_Cyrl"
,
"fra_Latn"
,
"jpn_Jpan"
,
"lij_Latn"
,
"mya_Mymr"
,
"sag_Latn"
,
"taq_Tfng"
,
"wol_Latn"
,
"arb_Latn"
,
"cat_Latn"
,
"fur_Latn"
,
"kab_Latn"
,
"lim_Latn"
,
"nld_Latn"
,
"san_Deva"
,
"tat_Cyrl"
,
"xho_Latn"
,
"ars_Arab"
,
"ceb_Latn"
,
"fuv_Latn"
,
"kac_Latn"
,
"lin_Latn"
,
"nno_Latn"
,
"sat_Olck"
,
"tel_Telu"
,
"ydd_Hebr"
,
"ary_Arab"
,
"ces_Latn"
,
"gaz_Latn"
,
"kam_Latn"
,
"lit_Latn"
,
"nob_Latn"
,
"scn_Latn"
,
"tgk_Cyrl"
,
"yor_Latn"
,
"arz_Arab"
,
"cjk_Latn"
,
"gla_Latn"
,
"kan_Knda"
,
"lmo_Latn"
,
"npi_Deva"
,
"shn_Mymr"
,
"tgl_Latn"
,
"yue_Hant"
,
"asm_Beng"
,
"ckb_Arab"
,
"gle_Latn"
,
"kas_Arab"
,
"ltg_Latn"
,
"nso_Latn"
,
"sin_Sinh"
,
"tha_Thai"
,
"zho_Hans"
,
"ast_Latn"
,
"crh_Latn"
,
"glg_Latn"
,
"kas_Deva"
,
"ltz_Latn"
,
"nus_Latn"
,
"slk_Latn"
,
"tir_Ethi"
,
"zho_Hant"
,
"awa_Deva"
,
"cym_Latn"
,
"grn_Latn"
,
"kat_Geor"
,
"lua_Latn"
,
"nya_Latn"
,
"slv_Latn"
,
"tpi_Latn"
,
"zsm_Latn"
,
"ayr_Latn"
,
"dan_Latn"
,
"guj_Gujr"
,
"kaz_Cyrl"
,
"lug_Latn"
,
"oci_Latn"
,
"smo_Latn"
,
"tsn_Latn"
,
"zul_Latn"
,
"azb_Arab"
,
"deu_Latn"
,
"hat_Latn"
,
"kbp_Latn"
,
"luo_Latn"
,
"ory_Orya"
,
"sna_Latn"
,
"tso_Latn"
,
"azj_Latn"
,
"dik_Latn"
,
"hau_Latn"
,
"kea_Latn"
,
"lus_Latn"
,
"pag_Latn"
,
"snd_Arab"
,
"tuk_Latn"
,
"bak_Cyrl"
,
"dyu_Latn"
,
"heb_Hebr"
,
"khk_Cyrl"
,
"lvs_Latn"
,
"pan_Guru"
,
"som_Latn"
,
"tum_Latn"
,
]
LANGUAGE_PAIRS
=
[
(
a
,
b
)
for
idx
,
a
in
enumerate
(
_LANGUAGES
)
for
b
in
_LANGUAGES
[
idx
+
1
:]
]
LANGUAGES_OF_INTEREST
=
[
"cat_Latn"
,
"spa_Latn"
,
"eng_Latn"
,
"glg_Latn"
,
"eus_Latn"
,
"ita_Latn"
,
"deu_Latn"
,
"por_Latn"
,
"fra_Latn"
,
]
MAIN_LANG
=
"eus_Latn"
LANGUAGE_PAIRS
=
[
(
a
,
b
)
for
(
a
,
b
)
in
LANGUAGE_PAIRS
if
a
in
LANGUAGES_OF_INTEREST
and
b
in
LANGUAGES_OF_INTEREST
and
MAIN_LANG
in
(
a
,
b
)
]
# auxiliary functions
code_to_language_name
=
lambda
code
:
Language
.
make
(
language
=
Language
.
get
(
code
)[
"language"
]
).
display_name
()
code_to_short_name
=
lambda
code
:
Language
.
get
(
code
)[
"language"
]
jinja_var
=
(
lambda
s
:
"{{"
+
s
+
"}}"
)
# wrapper to avoid having to escape { } in format strings
def
doc_to_text
(
src
:
str
,
tgt
:
str
)
->
str
:
src_name
,
tgt_name
=
map
(
code_to_language_name
,
[
src
,
tgt
])
return
f
"""
\
{
src_name
}
sentence:
{
jinja_var
(
'sentence_'
+
src
)
}
{
tgt_name
}
sentence:"""
def
doc_to_target
(
tgt
:
str
)
->
str
:
return
f
"
{
jinja_var
(
'sentence_'
+
tgt
)
}
"
# main function
def
gen_lang_yamls
(
output_dir
:
str
,
overwrite
:
bool
)
->
None
:
"""
Generate a YAML file for each translation direction.
"""
err
=
[]
for
src
,
tgt
in
LANGUAGE_PAIRS
:
# do both translation directions for each lang pair
for
src
,
tgt
in
[(
src
,
tgt
),
(
tgt
,
src
)]:
lang_pair_name
=
f
"
{
code_to_short_name
(
src
)
}
-
{
code_to_short_name
(
tgt
)
}
"
yaml_file_name
=
f
"flores_
{
lang_pair_name
}
.yaml"
try
:
with
open
(
f
"
{
output_dir
}
/
{
yaml_file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf-8"
,
)
as
outfile
:
print
(
f
"Creating
{
yaml_file_name
}
..."
)
outfile
.
write
(
"# File generated by `create-yamls.py`
\n
"
)
yaml
.
dump
(
{
# "group": [f"{BENCH_NAME}_bench", f"{BENCH_NAME}_bench_flores"],
# "group": "flores_eu",
"include"
:
"_flores_common_yaml"
,
"task"
:
f
"flores_
{
lang_pair_name
}
"
,
"doc_to_text"
:
doc_to_text
(
src
,
tgt
),
"doc_to_target"
:
doc_to_target
(
tgt
),
},
outfile
,
sort_keys
=
False
,
)
except
FileExistsError
:
err
.
append
(
yaml_file_name
)
if
len
(
err
)
>
0
:
raise
FileExistsError
(
"Files were not created because they already exist:"
f
"
{
', '
.
join
(
err
)
}
"
"
\n
Use flag --overwrite to overwrite them."
)
def
main
()
->
None
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--overwrite"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Overwrite files if they already exist"
,
)
parser
.
add_argument
(
"--output-dir"
,
default
=
"."
,
help
=
"Directory to write yaml files to"
)
args
=
parser
.
parse_args
()
gen_lang_yamls
(
output_dir
=
args
.
output_dir
,
overwrite
=
args
.
overwrite
)
if
__name__
==
"__main__"
:
main
()
lm_eval/tasks/basque_bench/flores_eu/flores_ca-eu.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_ca-eu
doc_to_text
:
'
Catalan
sentence:
{{sentence_cat_Latn}}
Basque
sentence:'
doc_to_target
:
'
{{sentence_eus_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_de-eu.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_de-eu
doc_to_text
:
'
German
sentence:
{{sentence_deu_Latn}}
Basque
sentence:'
doc_to_target
:
'
{{sentence_eus_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_en-eu.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_en-eu
doc_to_text
:
'
English
sentence:
{{sentence_eng_Latn}}
Basque
sentence:'
doc_to_target
:
'
{{sentence_eus_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_es-eu.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_es-eu
doc_to_text
:
'
Spanish
sentence:
{{sentence_spa_Latn}}
Basque
sentence:'
doc_to_target
:
'
{{sentence_eus_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu-ca.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_eu-ca
doc_to_text
:
'
Basque
sentence:
{{sentence_eus_Latn}}
Catalan
sentence:'
doc_to_target
:
'
{{sentence_cat_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu-de.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_eu-de
doc_to_text
:
'
Basque
sentence:
{{sentence_eus_Latn}}
German
sentence:'
doc_to_target
:
'
{{sentence_deu_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu-en.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_eu-en
doc_to_text
:
'
Basque
sentence:
{{sentence_eus_Latn}}
English
sentence:'
doc_to_target
:
'
{{sentence_eng_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu-es.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_eu-es
doc_to_text
:
'
Basque
sentence:
{{sentence_eus_Latn}}
Spanish
sentence:'
doc_to_target
:
'
{{sentence_spa_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu-fr.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_eu-fr
doc_to_text
:
'
Basque
sentence:
{{sentence_eus_Latn}}
French
sentence:'
doc_to_target
:
'
{{sentence_fra_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu-gl.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_eu-gl
doc_to_text
:
'
Basque
sentence:
{{sentence_eus_Latn}}
Galician
sentence:'
doc_to_target
:
'
{{sentence_glg_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu-it.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_eu-it
doc_to_text
:
'
Basque
sentence:
{{sentence_eus_Latn}}
Italian
sentence:'
doc_to_target
:
'
{{sentence_ita_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu-pt.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_eu-pt
doc_to_text
:
'
Basque
sentence:
{{sentence_eus_Latn}}
Portuguese
sentence:'
doc_to_target
:
'
{{sentence_por_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_eu.yaml
0 → 100644
View file @
25869601
group
:
flores_eu
task
:
-
flores_es-eu
-
flores_eu-es
-
flores_en-eu
-
flores_eu-en
-
flores_eu-pt
-
flores_pt-eu
-
flores_eu-it
-
flores_it-eu
-
flores_eu-fr
-
flores_fr-eu
-
flores_eu-ca
-
flores_ca-eu
-
flores_eu-gl
-
flores_gl-eu
-
flores_eu-de
-
flores_de-eu
aggregate_metric_list
:
-
metric
:
bleu
aggregation
:
mean
weight_by_size
:
false
metadata
:
version
:
1.0
lm_eval/tasks/basque_bench/flores_eu/flores_fr-eu.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_fr-eu
doc_to_text
:
'
French
sentence:
{{sentence_fra_Latn}}
Basque
sentence:'
doc_to_target
:
'
{{sentence_eus_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_gl-eu.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_gl-eu
doc_to_text
:
'
Galician
sentence:
{{sentence_glg_Latn}}
Basque
sentence:'
doc_to_target
:
'
{{sentence_eus_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_it-eu.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_it-eu
doc_to_text
:
'
Italian
sentence:
{{sentence_ita_Latn}}
Basque
sentence:'
doc_to_target
:
'
{{sentence_eus_Latn}}'
lm_eval/tasks/basque_bench/flores_eu/flores_pt-eu.yaml
0 → 100644
View file @
25869601
# File generated by `create-yamls.py`
include
:
_flores_common_yaml
task
:
flores_pt-eu
doc_to_text
:
'
Portuguese
sentence:
{{sentence_por_Latn}}
Basque
sentence:'
doc_to_target
:
'
{{sentence_eus_Latn}}'
lm_eval/tasks/basque_bench/mgsm_cot_native_eu.yaml
0 → 100644
View file @
25869601
task
:
mgsm_native_cot_eu
dataset_path
:
HiTZ/MGSM-eu
dataset_name
:
null
doc_to_target
:
'
{%
if
answer
is
not
none
%}{{answer[27:]}}{%
else
%}{{answer_number|string}}{%endif
%}'
doc_to_text
:
'
{%
if
answer
is
not
none
%}{{question+"\nErantzuna
urratsez
urrats:"}}{%
else
%}{{"Galdera:
"+question+"\nErantzuna
urratsez
urrats:"}}{%
endif
%}'
output_type
:
generate_until
training_split
:
train
test_split
:
test
target_delimiter
:
"
"
generation_kwargs
:
until
:
-
"
\n\n
"
-
"
\n
"
-
"
Galdera:"
-
</s>
-
<|im_end|>
do_sample
:
false
temperature
:
0.0
filter_list
:
-
name
:
"
get-answer"
filter
:
-
function
:
"
regex"
regex_pattern
:
"
Erantzuna
[$%]?
?(-?[0-9]+([
.,][0-9.,]+)?)
?[$%]?
da"
-
function
:
"
take_first"
metric_list
:
-
metric
:
exact_match
aggregation
:
mean
higher_is_better
:
true
ignore_case
:
true
ignore_punctuation
:
true
regexes_to_ignore
:
-
"
"
metadata
:
version
:
1.0
Prev
1
2
3
4
5
6
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment