Merge branch 'main' of https://github.com/EleutherAI/lm-evaluation-harness into mela

741a6a69 · lintangsutawika · 494a4515 · b536f067 · 741a6a69 · 741a6a69
Commit 741a6a69 authored Aug 20, 2024 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/belebele/_default_template_yaml
+++ b/lm_eval/tasks/belebele/_default_template_yaml
-group: belebele
 dataset_path: facebook/belebele
 fewshot_config:
  sampler: first_n
 output_type: multiple_choice
 should_decontaminate: true
 doc_to_decontamination_query: "{{question}}"
-doc_to_text: "P: {{flores_passage}}\nQ: {{question.strip()}}\nA: {{mc_answer1}}\nB: {{mc_answer2}}\nC: {{mc_answer3}}\nD: {{mc_answer4}}\nAnswer："
+doc_to_text: "P: {{flores_passage}}\nQ: {{question.strip()}}\nA: {{mc_answer1}}\nB: {{mc_answer2}}\nC: {{mc_answer3}}\nD: {{mc_answer4}}\nAnswer:"
 doc_to_choice: ["A", "B", "C", "D"]
 doc_to_target: "{{['1', '2', '3', '4'].index(correct_answer_num)}}"
 metric_list:

--- a/lm_eval/tasks/belebele/_generate_configs.py
+++ b/lm_eval/tasks/belebele/_generate_configs.py
@@ -65,3 +65,36 @@ if __name__ == "__main__":
                allow_unicode=True,
                default_style='"',
            )
+
+    # write group config out
+
+    group_yaml_dict = {
+        "group": f"belebele_{args.task_prefix}"
+        if args.task_prefix != ""
+        else "belebele",
+        "task": [
+            (
+                f"belebele_{args.task_prefix}_{lang}"
+                if args.task_prefix != ""
+                else f"belebele_{lang}"
+            )
+            for lang in languages
+            if "default" not in lang
+        ],
+        "aggregate_metric_list": [
+            {"metric": "acc", "aggregation": "mean", "weight_by_size": False},
+            {"metric": "acc_norm", "aggregation": "mean", "weight_by_size": False},
+        ],
+        "metadata": {"version": 0.0},
+    }
+
+    file_save_path = "_" + args.save_prefix_path + f"{args.task_prefix}.yaml"
+
+    with open(file_save_path, "w", encoding="utf-8") as group_yaml_file:
+        yaml.dump(
+            group_yaml_dict,
+            group_yaml_file,
+            width=float("inf"),
+            allow_unicode=True,
+            default_style='"',
+        )
--- a/lm_eval/tasks/belebele/belebele_acm_Arab.yaml
+++ b/lm_eval/tasks/belebele/belebele_acm_Arab.yaml
-"fewshot_split": "acm_Arab"
-"include": "_default_template_yaml"
-"task": "belebele_acm_Arab"
-"test_split": "acm_Arab"
+dataset_name: acm_Arab
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_acm_Arab
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_afr_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_afr_Latn.yaml
-"fewshot_split": "afr_Latn"
-"include": "_default_template_yaml"
-"task": "belebele_afr_Latn"
-"test_split": "afr_Latn"
+dataset_name: afr_Latn
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_afr_Latn
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_als_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_als_Latn.yaml
-"fewshot_split": "als_Latn"
-"include": "_default_template_yaml"
-"task": "belebele_als_Latn"
-"test_split": "als_Latn"
+dataset_name: als_Latn
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_als_Latn
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
+++ b/lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
-"fewshot_split": "amh_Ethi"
-"include": "_default_template_yaml"
-"task": "belebele_amh_Ethi"
-"test_split": "amh_Ethi"
+dataset_name: amh_Ethi
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_amh_Ethi
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_apc_Arab.yaml
+++ b/lm_eval/tasks/belebele/belebele_apc_Arab.yaml
-"fewshot_split": "apc_Arab"
-"include": "_default_template_yaml"
-"task": "belebele_apc_Arab"
-"test_split": "apc_Arab"
+dataset_name: apc_Arab
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_apc_Arab
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_arb_Arab.yaml
+++ b/lm_eval/tasks/belebele/belebele_arb_Arab.yaml
-"fewshot_split": "arb_Arab"
-"include": "_default_template_yaml"
-"task": "belebele_arb_Arab"
-"test_split": "arb_Arab"
+dataset_name: arb_Arab
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_arb_Arab
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_arb_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_arb_Latn.yaml
-"fewshot_split": "arb_Latn"
-"include": "_default_template_yaml"
-"task": "belebele_arb_Latn"
-"test_split": "arb_Latn"
+dataset_name: arb_Latn
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_arb_Latn
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_ars_Arab.yaml
+++ b/lm_eval/tasks/belebele/belebele_ars_Arab.yaml
-"fewshot_split": "ars_Arab"
-"include": "_default_template_yaml"
-"task": "belebele_ars_Arab"
-"test_split": "ars_Arab"
+dataset_name: ars_Arab
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_ars_Arab
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_ary_Arab.yaml
+++ b/lm_eval/tasks/belebele/belebele_ary_Arab.yaml
-"fewshot_split": "ary_Arab"
-"include": "_default_template_yaml"
-"task": "belebele_ary_Arab"
-"test_split": "ary_Arab"
+dataset_name: ary_Arab
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_ary_Arab
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_arz_Arab.yaml
+++ b/lm_eval/tasks/belebele/belebele_arz_Arab.yaml
-"fewshot_split": "arz_Arab"
-"include": "_default_template_yaml"
-"task": "belebele_arz_Arab"
-"test_split": "arz_Arab"
+dataset_name: arz_Arab
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_arz_Arab
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_asm_Beng.yaml
+++ b/lm_eval/tasks/belebele/belebele_asm_Beng.yaml
-"fewshot_split": "asm_Beng"
-"include": "_default_template_yaml"
-"task": "belebele_asm_Beng"
-"test_split": "asm_Beng"
+dataset_name: asm_Beng
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_asm_Beng
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_azj_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_azj_Latn.yaml
-"fewshot_split": "azj_Latn"
-"include": "_default_template_yaml"
-"task": "belebele_azj_Latn"
-"test_split": "azj_Latn"
+dataset_name: azj_Latn
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_azj_Latn
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_bam_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_bam_Latn.yaml
-"fewshot_split": "bam_Latn"
-"include": "_default_template_yaml"
-"task": "belebele_bam_Latn"
-"test_split": "bam_Latn"
+dataset_name: bam_Latn
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_bam_Latn
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_ben_Beng.yaml
+++ b/lm_eval/tasks/belebele/belebele_ben_Beng.yaml
-"fewshot_split": "ben_Beng"
-"include": "_default_template_yaml"
-"task": "belebele_ben_Beng"
-"test_split": "ben_Beng"
+dataset_name: ben_Beng
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_ben_Beng
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_ben_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_ben_Latn.yaml
-"fewshot_split": "ben_Latn"
-"include": "_default_template_yaml"
-"task": "belebele_ben_Latn"
-"test_split": "ben_Latn"
+dataset_name: ben_Latn
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_ben_Latn
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
+++ b/lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
-"fewshot_split": "bod_Tibt"
-"include": "_default_template_yaml"
-"task": "belebele_bod_Tibt"
-"test_split": "bod_Tibt"
+dataset_name: bod_Tibt
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_bod_Tibt
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
+++ b/lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
-"fewshot_split": "bul_Cyrl"
-"include": "_default_template_yaml"
-"task": "belebele_bul_Cyrl"
-"test_split": "bul_Cyrl"
+dataset_name: bul_Cyrl
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_bul_Cyrl
+test_split: test
--- a/lm_eval/tasks/belebele/belebele_cat_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_cat_Latn.yaml
-"fewshot_split": "cat_Latn"
-"include": "_default_template_yaml"
-"task": "belebele_cat_Latn"
-"test_split": "cat_Latn"
+dataset_name: cat_Latn
+fewshot_split: test
+include: _default_template_yaml
+task: belebele_cat_Latn
+test_split: test