Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b95ef9b1
Commit
b95ef9b1
authored
Oct 09, 2025
by
Baber
Browse files
add alias
parent
6ace93ae
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
202 additions
and
98 deletions
+202
-98
lm_eval/tasks/mrl/mc/_generate_config.py
lm_eval/tasks/mrl/mc/_generate_config.py
+8
-1
lm_eval/tasks/mrl/mc/_global_piqa.yaml
lm_eval/tasks/mrl/mc/_global_piqa.yaml
+194
-97
No files found.
lm_eval/tasks/mrl/mc/_generate_config.py
View file @
b95ef9b1
...
@@ -22,10 +22,17 @@ with open(PARENT / "_global_piqa.yaml", "w") as f:
...
@@ -22,10 +22,17 @@ with open(PARENT / "_global_piqa.yaml", "w") as f:
f
.
write
(
"group: global_piqa
\n
"
)
f
.
write
(
"group: global_piqa
\n
"
)
f
.
write
(
"task:
\n
"
)
f
.
write
(
"task:
\n
"
)
for
s
in
subsets
:
for
s
in
subsets
:
f
.
write
(
f
" - mrl_
{
s
}
\n
"
)
f
.
write
(
f
" - task: mrl_
{
s
}
\n
"
)
f
.
write
(
f
" task_alias:
{
s
}
\n
"
)
f
.
write
(
"aggregate_metric_list:
\n
"
)
f
.
write
(
"aggregate_metric_list:
\n
"
)
f
.
write
(
" - metric: acc
\n
"
)
f
.
write
(
" - metric: acc
\n
"
)
f
.
write
(
" aggregation: mean
\n
"
)
f
.
write
(
" aggregation: mean
\n
"
)
f
.
write
(
" weight_by_size: true
\n
"
)
f
.
write
(
" weight_by_size: true
\n
"
)
f
.
write
(
" - metric: acc_norm
\n
"
)
f
.
write
(
" aggregation: mean
\n
"
)
f
.
write
(
" weight_by_size: true
\n
"
)
f
.
write
(
" - metric: acc_bytes
\n
"
)
f
.
write
(
" aggregation: mean
\n
"
)
f
.
write
(
" weight_by_size: true
\n
"
)
f
.
write
(
"metadata:
\n
"
)
f
.
write
(
"metadata:
\n
"
)
f
.
write
(
" version: 1.0
\n
"
)
f
.
write
(
" version: 1.0
\n
"
)
lm_eval/tasks/mrl/mc/_global_piqa.yaml
View file @
b95ef9b1
group
:
global_piqa
group
:
global_piqa
task
:
task
:
-
mrl_acq_arab
-
task
:
mrl_acq_arab
-
mrl_aeb_arab
task_alias
:
acq_arab
-
mrl_afb_arab
-
task
:
mrl_aeb_arab
-
mrl_als_latn
task_alias
:
aeb_arab
-
mrl_amh_ethi
-
task
:
mrl_afb_arab
-
mrl_apc_arab_jord
task_alias
:
afb_arab
-
mrl_apc_arab_leba
-
task
:
mrl_als_latn
-
mrl_apc_arab_pale
task_alias
:
als_latn
-
mrl_apc_arab_syri
-
task
:
mrl_amh_ethi
-
mrl_arb_arab
task_alias
:
amh_ethi
-
mrl_arq_arab
-
task
:
mrl_apc_arab_jord
-
mrl_ars_arab
task_alias
:
apc_arab_jord
-
mrl_ary_arab
-
task
:
mrl_apc_arab_leba
-
mrl_arz_arab
task_alias
:
apc_arab_leba
-
mrl_azj_latn
-
task
:
mrl_apc_arab_pale
-
mrl_bam_latn
task_alias
:
apc_arab_pale
-
mrl_bel_cyrl
-
task
:
mrl_apc_arab_syri
-
mrl_ben_latn
task_alias
:
apc_arab_syri
-
mrl_bho_deva
-
task
:
mrl_arb_arab
-
mrl_bsk_arab
task_alias
:
arb_arab
-
mrl_bul_cyrl
-
task
:
mrl_arq_arab
-
mrl_cat_latn
task_alias
:
arq_arab
-
mrl_ces_latn
-
task
:
mrl_ars_arab
-
mrl_ckb_arab
task_alias
:
ars_arab
-
mrl_ckm_latn
-
task
:
mrl_ary_arab
-
mrl_cmn_hans
task_alias
:
ary_arab
-
mrl_cmn_hant
-
task
:
mrl_arz_arab
-
mrl_dhd_deva
task_alias
:
arz_arab
-
mrl_ell_grek
-
task
:
mrl_azj_latn
-
mrl_eng_latn
task_alias
:
azj_latn
-
mrl_est_latn
-
task
:
mrl_bam_latn
-
mrl_fao_latn
task_alias
:
bam_latn
-
mrl_fin_latn
-
task
:
mrl_bel_cyrl
-
mrl_fra_latn_cana
task_alias
:
bel_cyrl
-
mrl_fra_latn_fran
-
task
:
mrl_ben_latn
-
mrl_glg_latn
task_alias
:
ben_latn
-
mrl_guj_gujr
-
task
:
mrl_bho_deva
-
mrl_hau_latn
task_alias
:
bho_deva
-
mrl_haw_latn
-
task
:
mrl_bsk_arab
-
mrl_heb_hebr
task_alias
:
bsk_arab
-
mrl_hrv_latn
-
task
:
mrl_bul_cyrl
-
mrl_hun_latn
task_alias
:
bul_cyrl
-
mrl_hye_armn
-
task
:
mrl_cat_latn
-
mrl_ibo_latn
task_alias
:
cat_latn
-
mrl_idu_latn
-
task
:
mrl_ces_latn
-
mrl_ind_latn
task_alias
:
ces_latn
-
mrl_isl_latn
-
task
:
mrl_ckb_arab
-
mrl_iso_latn
task_alias
:
ckb_arab
-
mrl_ita_latn
-
task
:
mrl_ckm_latn
-
mrl_jav_latn
task_alias
:
ckm_latn
-
mrl_jpn_jpan
-
task
:
mrl_cmn_hans
-
mrl_kat_geor
task_alias
:
cmn_hans
-
mrl_kaz_cyrl
-
task
:
mrl_cmn_hant
-
mrl_kir_cyrl
task_alias
:
cmn_hant
-
mrl_kor_hang
-
task
:
mrl_dhd_deva
-
mrl_lit_latn
task_alias
:
dhd_deva
-
mrl_mar_deva
-
task
:
mrl_ell_grek
-
mrl_mkd_cyrl
task_alias
:
ell_grek
-
mrl_mni_beng
-
task
:
mrl_eng_latn
-
mrl_nag_latn
task_alias
:
eng_latn
-
mrl_nld_latn
-
task
:
mrl_est_latn
-
mrl_nno_latn
task_alias
:
est_latn
-
mrl_nob_latn
-
task
:
mrl_fao_latn
-
mrl_npi_deva
task_alias
:
fao_latn
-
mrl_pcm_latn
-
task
:
mrl_fin_latn
-
mrl_pes_arab
task_alias
:
fin_latn
-
mrl_pol_latn
-
task
:
mrl_fra_latn_cana
-
mrl_por_latn_braz
task_alias
:
fra_latn_cana
-
mrl_por_latn_port
-
task
:
mrl_fra_latn_fran
-
mrl_ron_latn
task_alias
:
fra_latn_fran
-
mrl_rwr_deva
-
task
:
mrl_glg_latn
-
mrl_sin_sinh
task_alias
:
glg_latn
-
mrl_slk_latn
-
task
:
mrl_guj_gujr
-
mrl_slk_latn_sari
task_alias
:
guj_gujr
-
mrl_slv_latn
-
task
:
mrl_hau_latn
-
mrl_slv_latn_cerk
task_alias
:
hau_latn
-
mrl_snd_arab
-
task
:
mrl_haw_latn
-
mrl_snd_deva
task_alias
:
haw_latn
-
mrl_spa_latn_peru
-
task
:
mrl_heb_hebr
-
mrl_srp_cyrl
task_alias
:
heb_hebr
-
mrl_srp_latn
-
task
:
mrl_hrv_latn
-
mrl_swe_latn
task_alias
:
hrv_latn
-
mrl_tam_taml
-
task
:
mrl_hun_latn
-
mrl_tgl_latn
task_alias
:
hun_latn
-
mrl_tha_thai
-
task
:
mrl_hye_armn
-
mrl_tur_latn
task_alias
:
hye_armn
-
mrl_uig_arab
-
task
:
mrl_ibo_latn
-
mrl_ukr_cyrl
task_alias
:
ibo_latn
-
mrl_urd_arab
-
task
:
mrl_idu_latn
-
mrl_urd_latn
task_alias
:
idu_latn
-
mrl_urh_latn
-
task
:
mrl_ind_latn
-
mrl_uzn_latn
task_alias
:
ind_latn
-
mrl_vie_latn
-
task
:
mrl_isl_latn
-
mrl_yor_latn
task_alias
:
isl_latn
-
mrl_yue_hant
-
task
:
mrl_iso_latn
-
mrl_zsm_latn
task_alias
:
iso_latn
-
mrl_zul_latn
-
task
:
mrl_ita_latn
task_alias
:
ita_latn
-
task
:
mrl_jav_latn
task_alias
:
jav_latn
-
task
:
mrl_jpn_jpan
task_alias
:
jpn_jpan
-
task
:
mrl_kat_geor
task_alias
:
kat_geor
-
task
:
mrl_kaz_cyrl
task_alias
:
kaz_cyrl
-
task
:
mrl_kir_cyrl
task_alias
:
kir_cyrl
-
task
:
mrl_kor_hang
task_alias
:
kor_hang
-
task
:
mrl_lit_latn
task_alias
:
lit_latn
-
task
:
mrl_mar_deva
task_alias
:
mar_deva
-
task
:
mrl_mkd_cyrl
task_alias
:
mkd_cyrl
-
task
:
mrl_mni_beng
task_alias
:
mni_beng
-
task
:
mrl_nag_latn
task_alias
:
nag_latn
-
task
:
mrl_nld_latn
task_alias
:
nld_latn
-
task
:
mrl_nno_latn
task_alias
:
nno_latn
-
task
:
mrl_nob_latn
task_alias
:
nob_latn
-
task
:
mrl_npi_deva
task_alias
:
npi_deva
-
task
:
mrl_pcm_latn
task_alias
:
pcm_latn
-
task
:
mrl_pes_arab
task_alias
:
pes_arab
-
task
:
mrl_pol_latn
task_alias
:
pol_latn
-
task
:
mrl_por_latn_braz
task_alias
:
por_latn_braz
-
task
:
mrl_por_latn_port
task_alias
:
por_latn_port
-
task
:
mrl_ron_latn
task_alias
:
ron_latn
-
task
:
mrl_rwr_deva
task_alias
:
rwr_deva
-
task
:
mrl_sin_sinh
task_alias
:
sin_sinh
-
task
:
mrl_slk_latn
task_alias
:
slk_latn
-
task
:
mrl_slk_latn_sari
task_alias
:
slk_latn_sari
-
task
:
mrl_slv_latn
task_alias
:
slv_latn
-
task
:
mrl_slv_latn_cerk
task_alias
:
slv_latn_cerk
-
task
:
mrl_snd_arab
task_alias
:
snd_arab
-
task
:
mrl_snd_deva
task_alias
:
snd_deva
-
task
:
mrl_spa_latn_peru
task_alias
:
spa_latn_peru
-
task
:
mrl_srp_cyrl
task_alias
:
srp_cyrl
-
task
:
mrl_srp_latn
task_alias
:
srp_latn
-
task
:
mrl_swe_latn
task_alias
:
swe_latn
-
task
:
mrl_tam_taml
task_alias
:
tam_taml
-
task
:
mrl_tgl_latn
task_alias
:
tgl_latn
-
task
:
mrl_tha_thai
task_alias
:
tha_thai
-
task
:
mrl_tur_latn
task_alias
:
tur_latn
-
task
:
mrl_uig_arab
task_alias
:
uig_arab
-
task
:
mrl_ukr_cyrl
task_alias
:
ukr_cyrl
-
task
:
mrl_urd_arab
task_alias
:
urd_arab
-
task
:
mrl_urd_latn
task_alias
:
urd_latn
-
task
:
mrl_urh_latn
task_alias
:
urh_latn
-
task
:
mrl_uzn_latn
task_alias
:
uzn_latn
-
task
:
mrl_vie_latn
task_alias
:
vie_latn
-
task
:
mrl_yor_latn
task_alias
:
yor_latn
-
task
:
mrl_yue_hant
task_alias
:
yue_hant
-
task
:
mrl_zsm_latn
task_alias
:
zsm_latn
-
task
:
mrl_zul_latn
task_alias
:
zul_latn
aggregate_metric_list
:
aggregate_metric_list
:
-
metric
:
acc
-
metric
:
acc
aggregation
:
mean
aggregation
:
mean
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment