Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
46e8c8e6
Commit
46e8c8e6
authored
Jun 21, 2024
by
haileyschoelkopf
Browse files
add more explicit aggregation groups
parent
a382359c
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
166 additions
and
2 deletions
+166
-2
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
+0
-1
lm_eval/tasks/belebele/_belebele.yaml
lm_eval/tasks/belebele/_belebele.yaml
+133
-0
lm_eval/tasks/belebele/_default_template_yaml
lm_eval/tasks/belebele/_default_template_yaml
+0
-1
lm_eval/tasks/belebele/_generate_configs.py
lm_eval/tasks/belebele/_generate_configs.py
+33
-0
No files found.
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
View file @
46e8c8e6
group: bbh_zeroshot
dataset_path: lukaemon/bbh
output_type: generate_until
test_split: test
...
...
lm_eval/tasks/belebele/_belebele.yaml
0 → 100644
View file @
46e8c8e6
group
:
belebele
task
:
-
belebele_acm_Arab
-
belebele_arz_Arab
-
belebele_ceb_Latn
-
belebele_fin_Latn
-
belebele_hin_Deva
-
belebele_ita_Latn
-
belebele_khm_Khmr
-
belebele_lvs_Latn
-
belebele_npi_Deva
-
belebele_pol_Latn
-
belebele_slv_Latn
-
belebele_swe_Latn
-
belebele_tso_Latn
-
belebele_xho_Latn
-
belebele_afr_Latn
-
belebele_asm_Beng
-
belebele_ces_Latn
-
belebele_fra_Latn
-
belebele_hin_Latn
-
belebele_jav_Latn
-
belebele_kin_Latn
-
belebele_mal_Mlym
-
belebele_npi_Latn
-
belebele_por_Latn
-
belebele_sna_Latn
-
belebele_swh_Latn
-
belebele_tur_Latn
-
belebele_yor_Latn
-
belebele_als_Latn
-
belebele_azj_Latn
-
belebele_ckb_Arab
-
belebele_fuv_Latn
-
belebele_hrv_Latn
-
belebele_jpn_Jpan
-
belebele_kir_Cyrl
-
belebele_mar_Deva
-
belebele_nso_Latn
-
belebele_snd_Arab
-
belebele_tam_Taml
-
belebele_ukr_Cyrl
-
belebele_zho_Hans
-
belebele_amh_Ethi
-
belebele_bam_Latn
-
belebele_dan_Latn
-
belebele_gaz_Latn
-
belebele_hun_Latn
-
belebele_kac_Latn
-
belebele_kor_Hang
-
belebele_mkd_Cyrl
-
belebele_nya_Latn
-
belebele_ron_Latn
-
belebele_som_Latn
-
belebele_tel_Telu
-
belebele_urd_Arab
-
belebele_zho_Hant
-
belebele_apc_Arab
-
belebele_ben_Beng
-
belebele_deu_Latn
-
belebele_grn_Latn
-
belebele_hye_Armn
-
belebele_kan_Knda
-
belebele_lao_Laoo
-
belebele_mlt_Latn
-
belebele_ory_Orya
-
belebele_rus_Cyrl
-
belebele_sot_Latn
-
belebele_tgk_Cyrl
-
belebele_urd_Latn
-
belebele_zsm_Latn
-
belebele_arb_Arab
-
belebele_ben_Latn
-
belebele_ell_Grek
-
belebele_guj_Gujr
-
belebele_ibo_Latn
-
belebele_kat_Geor
-
belebele_lin_Latn
-
belebele_mri_Latn
-
belebele_pan_Guru
-
belebele_shn_Mymr
-
belebele_spa_Latn
-
belebele_tgl_Latn
-
belebele_uzn_Latn
-
belebele_zul_Latn
-
belebele_arb_Latn
-
belebele_bod_Tibt
-
belebele_eng_Latn
-
belebele_hat_Latn
-
belebele_ilo_Latn
-
belebele_kaz_Cyrl
-
belebele_lit_Latn
-
belebele_mya_Mymr
-
belebele_pbt_Arab
-
belebele_sin_Latn
-
belebele_srp_Cyrl
-
belebele_tha_Thai
-
belebele_vie_Latn
-
belebele_ars_Arab
-
belebele_bul_Cyrl
-
belebele_est_Latn
-
belebele_hau_Latn
-
belebele_ind_Latn
-
belebele_kea_Latn
-
belebele_lug_Latn
-
belebele_nld_Latn
-
belebele_pes_Arab
-
belebele_sin_Sinh
-
belebele_ssw_Latn
-
belebele_tir_Ethi
-
belebele_war_Latn
-
belebele_ary_Arab
-
belebele_cat_Latn
-
belebele_eus_Latn
-
belebele_heb_Hebr
-
belebele_isl_Latn
-
belebele_khk_Cyrl
-
belebele_luo_Latn
-
belebele_nob_Latn
-
belebele_plt_Latn
-
belebele_slk_Latn
-
belebele_sun_Latn
-
belebele_tsn_Latn
-
belebele_wol_Latn
aggregate_metric_list
:
-
aggregation
:
mean
metric
:
acc
weight_by_size
:
true
-
aggregation
:
mean
metric
:
acc_norm
weight_by_size
:
true
metadata
:
version
:
0.0
lm_eval/tasks/belebele/_default_template_yaml
View file @
46e8c8e6
group: belebele
dataset_path: facebook/belebele
fewshot_config:
sampler: first_n
...
...
lm_eval/tasks/belebele/_generate_configs.py
View file @
46e8c8e6
...
...
@@ -64,3 +64,36 @@ if __name__ == "__main__":
allow_unicode
=
True
,
default_style
=
'"'
,
)
# write group config out
group_yaml_dict
=
{
"group"
:
f
"belebele_
{
args
.
task_prefix
}
"
if
args
.
task_prefix
!=
""
else
"belebele"
,
"task"
:
[
(
f
"belebele_
{
args
.
task_prefix
}
_
{
lang
}
"
if
args
.
task_prefix
!=
""
else
f
"belebele_
{
lang
}
"
)
for
lang
in
languages
if
"default"
not
in
lang
],
"aggregate_metric_list"
:
[
{
"metric"
:
"acc"
,
"aggregation"
:
"mean"
,
"weight_by_size"
:
False
},
{
"metric"
:
"acc_norm"
,
"aggregation"
:
"mean"
,
"weight_by_size"
:
False
},
],
"metadata"
:
{
"version"
:
0.0
},
}
file_save_path
=
"_"
+
args
.
save_prefix_path
+
f
"
{
args
.
task_prefix
}
.yaml"
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
group_yaml_file
:
yaml
.
dump
(
group_yaml_dict
,
group_yaml_file
,
width
=
float
(
"inf"
),
allow_unicode
=
True
,
default_style
=
'"'
,
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment