Merge branch 'main' into feature/eval_from_config

601be343 · Baber · d0884a96 · 68c3a811 · 601be343 · 601be343
Commit 601be343 authored Jun 23, 2025 by Baber
20 changed files
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sna.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sna.yaml
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_translate
+task: afrimmlu_translate_sna_prompt_5
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sot.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sot.yaml
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_translate
+task: afrimmlu_translate_sot_prompt_5
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_swa.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_swa.yaml
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_translate
+task: afrimmlu_translate_swa_prompt_5
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_twi.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_twi.yaml
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_translate
+task: afrimmlu_translate_twi_prompt_5
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_wol.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_wol.yaml
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_translate
+task: afrimmlu_translate_wol_prompt_5
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_xho.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_xho.yaml
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_translate
+task: afrimmlu_translate_xho_prompt_5
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_yor.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_yor.yaml
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_translate
+task: afrimmlu_translate_yor_prompt_5
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_zul.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_zul.yaml
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_translate
+task: afrimmlu_translate_zul_prompt_5
--- a/lm_eval/tasks/afrimmlu/translate/prompt_5/utils.py
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/utils.py
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """Given your proficiency in {subject}, please answer the subsequent multiple-choice question with 'A', 'B', 'C', or 'D'.
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
--- a/lm_eval/tasks/afrimmlu/translate/utils.py
+++ b/lm_eval/tasks/afrimmlu/translate/utils.py
-from lm_eval.utils import weighted_f1_score
-
-
-def doc_to_choice(doc):
-    choices = eval(doc["choices"])
-    return choices
-
-
-def doc_to_text(doc):
-    output = """You are a highly knowledgeable and intelligent artificial intelligence
-                model answers multiple-choice questions about '{subject}'
-
-                Question: '''{question}'''
-
-                Choices:
-                        A: ''{choice1}'''
-                        B: ''{choice2}'''
-                        C: ''{choice3}'''
-                        D: ''{choice4}'''
-
-                Answer:  """
-
-    choices = eval(doc["choices"])
-    text = output.format(
-        subject=doc["subject"],
-        question=doc["question"],
-        choice1=choices[0],
-        choice2=choices[1],
-        choice3=choices[2],
-        choice4=choices[3],
-    )
-    return text
--- a/lm_eval/tasks/afrixnli/direct/afrixnli.yaml
+++ b/lm_eval/tasks/afrixnli/direct/afrixnli.yaml
+group: afrixnli-irokobench
+task:
+  - afrixnli_tasks_prompt_1
+  - afrixnli_tasks_prompt_2
+  - afrixnli_tasks_prompt_3
+  - afrixnli_tasks_prompt_4
+  - afrixnli_tasks_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_amh.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_amh.yaml
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_amh_prompt_1
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_eng.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_eng.yaml
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_eng_prompt_1
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ewe.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ewe.yaml
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_ewe_prompt_1
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_fra.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_fra.yaml
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_fra_prompt_1
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_hau.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_hau.yaml
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_hau_prompt_1
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ibo.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ibo.yaml
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_ibo_prompt_1
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_kin.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_kin.yaml
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_kin_prompt_1
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lin.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lin.yaml
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_lin_prompt_1
--- a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lug.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lug.yaml
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_lug_prompt_1