Commit 741a6a69 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'main' of https://github.com/EleutherAI/lm-evaluation-harness into mela

parents 494a4515 b536f067
group: belebele
dataset_path: facebook/belebele
fewshot_config:
sampler: first_n
output_type: multiple_choice
should_decontaminate: true
doc_to_decontamination_query: "{{question}}"
doc_to_text: "P: {{flores_passage}}\nQ: {{question.strip()}}\nA: {{mc_answer1}}\nB: {{mc_answer2}}\nC: {{mc_answer3}}\nD: {{mc_answer4}}\nAnswer"
doc_to_text: "P: {{flores_passage}}\nQ: {{question.strip()}}\nA: {{mc_answer1}}\nB: {{mc_answer2}}\nC: {{mc_answer3}}\nD: {{mc_answer4}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: "{{['1', '2', '3', '4'].index(correct_answer_num)}}"
metric_list:
......
......@@ -65,3 +65,36 @@ if __name__ == "__main__":
allow_unicode=True,
default_style='"',
)
# write group config out
group_yaml_dict = {
"group": f"belebele_{args.task_prefix}"
if args.task_prefix != ""
else "belebele",
"task": [
(
f"belebele_{args.task_prefix}_{lang}"
if args.task_prefix != ""
else f"belebele_{lang}"
)
for lang in languages
if "default" not in lang
],
"aggregate_metric_list": [
{"metric": "acc", "aggregation": "mean", "weight_by_size": False},
{"metric": "acc_norm", "aggregation": "mean", "weight_by_size": False},
],
"metadata": {"version": 0.0},
}
file_save_path = "_" + args.save_prefix_path + f"{args.task_prefix}.yaml"
with open(file_save_path, "w", encoding="utf-8") as group_yaml_file:
yaml.dump(
group_yaml_dict,
group_yaml_file,
width=float("inf"),
allow_unicode=True,
default_style='"',
)
"fewshot_split": "acm_Arab"
"include": "_default_template_yaml"
"task": "belebele_acm_Arab"
"test_split": "acm_Arab"
dataset_name: acm_Arab
fewshot_split: test
include: _default_template_yaml
task: belebele_acm_Arab
test_split: test
"fewshot_split": "afr_Latn"
"include": "_default_template_yaml"
"task": "belebele_afr_Latn"
"test_split": "afr_Latn"
dataset_name: afr_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_afr_Latn
test_split: test
"fewshot_split": "als_Latn"
"include": "_default_template_yaml"
"task": "belebele_als_Latn"
"test_split": "als_Latn"
dataset_name: als_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_als_Latn
test_split: test
"fewshot_split": "amh_Ethi"
"include": "_default_template_yaml"
"task": "belebele_amh_Ethi"
"test_split": "amh_Ethi"
dataset_name: amh_Ethi
fewshot_split: test
include: _default_template_yaml
task: belebele_amh_Ethi
test_split: test
"fewshot_split": "apc_Arab"
"include": "_default_template_yaml"
"task": "belebele_apc_Arab"
"test_split": "apc_Arab"
dataset_name: apc_Arab
fewshot_split: test
include: _default_template_yaml
task: belebele_apc_Arab
test_split: test
"fewshot_split": "arb_Arab"
"include": "_default_template_yaml"
"task": "belebele_arb_Arab"
"test_split": "arb_Arab"
dataset_name: arb_Arab
fewshot_split: test
include: _default_template_yaml
task: belebele_arb_Arab
test_split: test
"fewshot_split": "arb_Latn"
"include": "_default_template_yaml"
"task": "belebele_arb_Latn"
"test_split": "arb_Latn"
dataset_name: arb_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_arb_Latn
test_split: test
"fewshot_split": "ars_Arab"
"include": "_default_template_yaml"
"task": "belebele_ars_Arab"
"test_split": "ars_Arab"
dataset_name: ars_Arab
fewshot_split: test
include: _default_template_yaml
task: belebele_ars_Arab
test_split: test
"fewshot_split": "ary_Arab"
"include": "_default_template_yaml"
"task": "belebele_ary_Arab"
"test_split": "ary_Arab"
dataset_name: ary_Arab
fewshot_split: test
include: _default_template_yaml
task: belebele_ary_Arab
test_split: test
"fewshot_split": "arz_Arab"
"include": "_default_template_yaml"
"task": "belebele_arz_Arab"
"test_split": "arz_Arab"
dataset_name: arz_Arab
fewshot_split: test
include: _default_template_yaml
task: belebele_arz_Arab
test_split: test
"fewshot_split": "asm_Beng"
"include": "_default_template_yaml"
"task": "belebele_asm_Beng"
"test_split": "asm_Beng"
dataset_name: asm_Beng
fewshot_split: test
include: _default_template_yaml
task: belebele_asm_Beng
test_split: test
"fewshot_split": "azj_Latn"
"include": "_default_template_yaml"
"task": "belebele_azj_Latn"
"test_split": "azj_Latn"
dataset_name: azj_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_azj_Latn
test_split: test
"fewshot_split": "bam_Latn"
"include": "_default_template_yaml"
"task": "belebele_bam_Latn"
"test_split": "bam_Latn"
dataset_name: bam_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_bam_Latn
test_split: test
"fewshot_split": "ben_Beng"
"include": "_default_template_yaml"
"task": "belebele_ben_Beng"
"test_split": "ben_Beng"
dataset_name: ben_Beng
fewshot_split: test
include: _default_template_yaml
task: belebele_ben_Beng
test_split: test
"fewshot_split": "ben_Latn"
"include": "_default_template_yaml"
"task": "belebele_ben_Latn"
"test_split": "ben_Latn"
dataset_name: ben_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_ben_Latn
test_split: test
"fewshot_split": "bod_Tibt"
"include": "_default_template_yaml"
"task": "belebele_bod_Tibt"
"test_split": "bod_Tibt"
dataset_name: bod_Tibt
fewshot_split: test
include: _default_template_yaml
task: belebele_bod_Tibt
test_split: test
"fewshot_split": "bul_Cyrl"
"include": "_default_template_yaml"
"task": "belebele_bul_Cyrl"
"test_split": "bul_Cyrl"
dataset_name: bul_Cyrl
fewshot_split: test
include: _default_template_yaml
task: belebele_bul_Cyrl
test_split: test
"fewshot_split": "cat_Latn"
"include": "_default_template_yaml"
"task": "belebele_cat_Latn"
"test_split": "cat_Latn"
dataset_name: cat_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_cat_Latn
test_split: test
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment