Commit c8693599 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

add more groupings

parent c171fa30
group: belebele
task:
- belebele_acm_Arab
- belebele_arz_Arab
- belebele_ceb_Latn
- belebele_fin_Latn
- belebele_hin_Deva
- belebele_ita_Latn
- belebele_khm_Khmr
- belebele_lvs_Latn
- belebele_npi_Deva
- belebele_pol_Latn
- belebele_slv_Latn
- belebele_swe_Latn
- belebele_tso_Latn
- belebele_xho_Latn
- belebele_afr_Latn
- belebele_asm_Beng
- belebele_ces_Latn
- belebele_fra_Latn
- belebele_hin_Latn
- belebele_jav_Latn
- belebele_kin_Latn
- belebele_mal_Mlym
- belebele_npi_Latn
- belebele_por_Latn
- belebele_sna_Latn
- belebele_swh_Latn
- belebele_tur_Latn
- belebele_yor_Latn
- belebele_als_Latn
- belebele_azj_Latn
- belebele_ckb_Arab
- belebele_fuv_Latn
- belebele_hrv_Latn
- belebele_jpn_Jpan
- belebele_kir_Cyrl
- belebele_mar_Deva
- belebele_nso_Latn
- belebele_snd_Arab
- belebele_tam_Taml
- belebele_ukr_Cyrl
- belebele_zho_Hans
- belebele_amh_Ethi
- belebele_bam_Latn
- belebele_dan_Latn
- belebele_gaz_Latn
- belebele_hun_Latn
- belebele_kac_Latn
- belebele_kor_Hang
- belebele_mkd_Cyrl
- belebele_nya_Latn
- belebele_ron_Latn
- belebele_som_Latn
- belebele_tel_Telu
- belebele_urd_Arab
- belebele_zho_Hant
- belebele_apc_Arab
- belebele_ben_Beng
- belebele_deu_Latn
- belebele_grn_Latn
- belebele_hye_Armn
- belebele_kan_Knda
- belebele_lao_Laoo
- belebele_mlt_Latn
- belebele_ory_Orya
- belebele_rus_Cyrl
- belebele_sot_Latn
- belebele_tgk_Cyrl
- belebele_urd_Latn
- belebele_zsm_Latn
- belebele_arb_Arab
- belebele_ben_Latn
- belebele_ell_Grek
- belebele_guj_Gujr
- belebele_ibo_Latn
- belebele_kat_Geor
- belebele_lin_Latn
- belebele_mri_Latn
- belebele_pan_Guru
- belebele_shn_Mymr
- belebele_spa_Latn
- belebele_tgl_Latn
- belebele_uzn_Latn
- belebele_zul_Latn
- belebele_arb_Latn
- belebele_bod_Tibt
- belebele_eng_Latn
- belebele_hat_Latn
- belebele_ilo_Latn
- belebele_kaz_Cyrl
- belebele_lit_Latn
- belebele_mya_Mymr
- belebele_pbt_Arab
- belebele_sin_Latn
- belebele_srp_Cyrl
- belebele_tha_Thai
- belebele_vie_Latn
- belebele_ars_Arab
- belebele_bul_Cyrl
- belebele_est_Latn
- belebele_hau_Latn
- belebele_ind_Latn
- belebele_kea_Latn
- belebele_lug_Latn
- belebele_nld_Latn
- belebele_pes_Arab
- belebele_sin_Sinh
- belebele_ssw_Latn
- belebele_tir_Ethi
- belebele_war_Latn
- belebele_ary_Arab
- belebele_cat_Latn
- belebele_eus_Latn
- belebele_heb_Hebr
- belebele_isl_Latn
- belebele_khk_Cyrl
- belebele_luo_Latn
- belebele_nob_Latn
- belebele_plt_Latn
- belebele_slk_Latn
- belebele_sun_Latn
- belebele_tsn_Latn
- belebele_wol_Latn
- belebele_acm_Arab
- belebele_arz_Arab
- belebele_ceb_Latn
- belebele_fin_Latn
- belebele_hin_Deva
- belebele_ita_Latn
- belebele_khm_Khmr
- belebele_lvs_Latn
- belebele_npi_Deva
- belebele_pol_Latn
- belebele_slv_Latn
- belebele_swe_Latn
- belebele_tso_Latn
- belebele_xho_Latn
- belebele_afr_Latn
- belebele_asm_Beng
- belebele_ces_Latn
- belebele_fra_Latn
- belebele_hin_Latn
- belebele_jav_Latn
- belebele_kin_Latn
- belebele_mal_Mlym
- belebele_npi_Latn
- belebele_por_Latn
- belebele_sna_Latn
- belebele_swh_Latn
- belebele_tur_Latn
- belebele_yor_Latn
- belebele_als_Latn
- belebele_azj_Latn
- belebele_ckb_Arab
- belebele_fuv_Latn
- belebele_hrv_Latn
- belebele_jpn_Jpan
- belebele_kir_Cyrl
- belebele_mar_Deva
- belebele_nso_Latn
- belebele_snd_Arab
- belebele_tam_Taml
- belebele_ukr_Cyrl
- belebele_zho_Hans
- belebele_amh_Ethi
- belebele_bam_Latn
- belebele_dan_Latn
- belebele_gaz_Latn
- belebele_hun_Latn
- belebele_kac_Latn
- belebele_kor_Hang
- belebele_mkd_Cyrl
- belebele_nya_Latn
- belebele_ron_Latn
- belebele_som_Latn
- belebele_tel_Telu
- belebele_urd_Arab
- belebele_zho_Hant
- belebele_apc_Arab
- belebele_ben_Beng
- belebele_deu_Latn
- belebele_grn_Latn
- belebele_hye_Armn
- belebele_kan_Knda
- belebele_lao_Laoo
- belebele_mlt_Latn
- belebele_ory_Orya
- belebele_rus_Cyrl
- belebele_sot_Latn
- belebele_tgk_Cyrl
- belebele_urd_Latn
- belebele_zsm_Latn
- belebele_arb_Arab
- belebele_ben_Latn
- belebele_ell_Grek
- belebele_guj_Gujr
- belebele_ibo_Latn
- belebele_kat_Geor
- belebele_lin_Latn
- belebele_mri_Latn
- belebele_pan_Guru
- belebele_shn_Mymr
- belebele_spa_Latn
- belebele_tgl_Latn
- belebele_uzn_Latn
- belebele_zul_Latn
- belebele_arb_Latn
- belebele_bod_Tibt
- belebele_eng_Latn
- belebele_hat_Latn
- belebele_ilo_Latn
- belebele_kaz_Cyrl
- belebele_lit_Latn
- belebele_mya_Mymr
- belebele_pbt_Arab
- belebele_sin_Latn
- belebele_srp_Cyrl
- belebele_tha_Thai
- belebele_vie_Latn
- belebele_ars_Arab
- belebele_bul_Cyrl
- belebele_est_Latn
- belebele_hau_Latn
- belebele_ind_Latn
- belebele_kea_Latn
- belebele_lug_Latn
- belebele_nld_Latn
- belebele_pes_Arab
- belebele_sin_Sinh
- belebele_ssw_Latn
- belebele_tir_Ethi
- belebele_war_Latn
- belebele_ary_Arab
- belebele_cat_Latn
- belebele_eus_Latn
- belebele_heb_Hebr
- belebele_isl_Latn
- belebele_khk_Cyrl
- belebele_luo_Latn
- belebele_nob_Latn
- belebele_plt_Latn
- belebele_slk_Latn
- belebele_sun_Latn
- belebele_tsn_Latn
- belebele_wol_Latn
aggregate_metric_list:
- aggregation: mean
- aggregation: mean
metric: acc
weight_by_size: true
- aggregation: mean
- aggregation: mean
metric: acc_norm
weight_by_size: true
metadata:
......
group: advanced_ai_risk
tag: advanced_ai_risk
dataset_path: EleutherAI/advanced_ai_risk
output_type: multiple_choice
validation_split: validation
......
group: persona
tag: persona
dataset_path: EleutherAI/persona
output_type: multiple_choice
validation_split: validation
......
group: sycophancy
tag: sycophancy
task: sycophancy_on_nlp_survey
dataset_path: EleutherAI/sycophancy
dataset_name: sycophancy_on_nlp_survey
......
group: sycophancy
tag: sycophancy
task: sycophancy_on_philpapers2020
dataset_path: EleutherAI/sycophancy
dataset_name: sycophancy_on_philpapers2020
......
group: sycophancy
tag: sycophancy
task: sycophancy_on_political_typology_quiz
dataset_path: EleutherAI/sycophancy
dataset_name: sycophancy_on_political_typology_quiz
......
group: winogenerated
dataset_path: EleutherAI/winogenerated
output_type: multiple_choice
validation_split: validation
target_delimiter: ""
doc_to_text: "{{question}}"
doc_to_target: 0
doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
metric_list:
- metric: acc
metadata:
version: 0.0
......@@ -5,8 +5,7 @@ doc_to_target: ' {{translation["en"]}}'
doc_to_text: 'Arabic phrase: {{translation["ar"]}}
English phrase:'
group:
- generate_until
tag:
- translation
- iwslt2017
include: wmt_common_yaml
......
......@@ -5,8 +5,7 @@ doc_to_target: ' {{translation["ar"]}}'
doc_to_text: 'English phrase: {{translation["en"]}}
Arabic phrase:'
group:
- generate_until
tag:
- translation
- iwslt2017
include: wmt_common_yaml
......
......@@ -5,8 +5,7 @@ doc_to_target: ' {{translation["fr"]}}'
doc_to_text: 'English phrase: {{translation["en"]}}
French phrase:'
group:
- generate_until
tag:
- translation
- wmt14
- gpt3_translation_benchmarks
......
......@@ -5,8 +5,7 @@ doc_to_target: ' {{translation["en"]}}'
doc_to_text: 'French phrase: {{translation["fr"]}}
English phrase:'
group:
- generate_until
tag:
- translation
- wmt14
- gpt3_translation_benchmarks
......
......@@ -5,8 +5,7 @@ doc_to_target: ' {{translation["en"]}}'
doc_to_text: 'German phrase: {{translation["de"]}}
English phrase:'
group:
- generate_until
tag:
- translation
- wmt16
- gpt3_translation_benchmarks
......
......@@ -5,8 +5,7 @@ doc_to_target: ' {{translation["de"]}}'
doc_to_text: 'English phrase: {{translation["en"]}}
German phrase:'
group:
- generate_until
tag:
- translation
- wmt16
- gpt3_translation_benchmarks
......
......@@ -5,8 +5,7 @@ doc_to_target: ' {{translation["ro"]}}'
doc_to_text: 'English phrase: {{translation["en"]}}
Romanian phrase:'
group:
- generate_until
tag:
- translation
- wmt16
- gpt3_translation_benchmarks
......
......@@ -5,8 +5,7 @@ doc_to_target: ' {{translation["en"]}}'
doc_to_text: 'Romanian phrase: {{translation["ro"]}}
English phrase:'
group:
- generate_until
tag:
- translation
- wmt16
- gpt3_translation_benchmarks
......
group:
tag:
- truthfulqa
task: truthfulqa_gen
dataset_path: truthful_qa
......
group:
tag:
- truthfulqa
task: truthfulqa_mc1
dataset_path: truthful_qa
......
group:
tag:
- unscramble
task: anagrams1
dataset_path: EleutherAI/unscramble
......
group:
tag:
- unscramble
task: anagrams2
dataset_path: EleutherAI/unscramble
......
group:
tag:
- unscramble
task: cycle_letters
dataset_path: EleutherAI/unscramble
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment