formatting (#2104)

56a4e794 · Lintang Sutawika · GitHub · 9884ad6e · 56a4e794 · 56a4e794
Unverified Commit 56a4e794 authored Jul 15, 2024 by Lintang Sutawika Committed by GitHub Jul 15, 2024
18 changed files
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml
 dataset_name: twi
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_twi
\ No newline at end of file
+task: afrimmlu_translate_twi
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml
 dataset_name: wol
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_wol
\ No newline at end of file
+task: afrimmlu_translate_wol
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml
 dataset_name: xho
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_xho
\ No newline at end of file
+task: afrimmlu_translate_xho
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml
 dataset_name: yor
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_yor
\ No newline at end of file
+task: afrimmlu_translate_yor
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml
 dataset_name: zul
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_zul
\ No newline at end of file
+task: afrimmlu_translate_zul
--- a/lm_eval/tasks/afrimmlu/translate/utils.py
+++ b/lm_eval/tasks/afrimmlu/translate/utils.py
@@ -7,9 +7,9 @@ def doc_to_choice(doc):


 def doc_to_text(doc):
-    output = """You are a highly knowledgeable and intelligent artificial intelligence 
+    output = """You are a highly knowledgeable and intelligent artificial intelligence
                model answers multiple-choice questions about '{subject}'
-                
+
                Question: '''{question}'''

                Choices:
@@ -17,16 +17,18 @@ def doc_to_text(doc):
                        B: ''{choice2}'''
                        C: ''{choice3}'''
                        D: ''{choice4}'''
-                       
+
                Answer:  """
-    
+
    choices = eval(doc["choices"])
-    text = output.format(subject=doc['subject'],
-                         question=doc['question'],
-                         choice1=choices[0],
-                         choice2=choices[1],
-                         choice3=choices[2],
-                         choice4=choices[3])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
    return text


@@ -35,4 +37,4 @@ def weighted_f1_score(items):
    golds = unzipped_list[0]
    preds = unzipped_list[1]
    fscore = f1_score(golds, preds, average="weighted")
-    return fscore
\ No newline at end of file
+    return fscore
--- a/lm_eval/tasks/afrimmlu/utils.py
+++ b/lm_eval/tasks/afrimmlu/utils.py
@@ -7,9 +7,9 @@ def doc_to_choice(doc):


 def doc_to_text(doc):
-    output = """You are a highly knowledgeable and intelligent artificial intelligence 
+    output = """You are a highly knowledgeable and intelligent artificial intelligence
                model answers multiple-choice questions about '{subject}'
-                
+
                Question: '''{question}'''

                Choices:
@@ -17,16 +17,18 @@ def doc_to_text(doc):
                        B: ''{choice2}'''
                        C: ''{choice3}'''
                        D: ''{choice4}'''
-                       
+
                Answer:  """
-    
+
    choices = eval(doc["choices"])
-    text = output.format(subject=doc['subject'],
-                         question=doc['question'],
-                         choice1=choices[0],
-                         choice2=choices[1],
-                         choice3=choices[2],
-                         choice4=choices[3])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
    return text


@@ -35,4 +37,4 @@ def weighted_f1_score(items):
    golds = unzipped_list[0]
    preds = unzipped_list[1]
    fscore = f1_score(golds, preds, average="weighted")
-    return fscore
\ No newline at end of file
+    return fscore
--- a/lm_eval/tasks/afrixnli/README.md
+++ b/lm_eval/tasks/afrixnli/README.md
@@ -5,8 +5,8 @@
 IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
 https://arxiv.org/pdf/2406.03368

-IrokoBench is a human-translated benchmark dataset for 16 typologically diverse 
-low-resource African languages covering three tasks: natural language inference (AfriXNLI), 
+IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
+low-resource African languages covering three tasks: natural language inference (AfriXNLI),
 mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).


@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU

 ```
 @misc{adelani2024irokobenchnewbenchmarkafrican,
-      title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models}, 
+      title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
      author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
      year={2024},
      eprint={2406.03368},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
-      url={https://arxiv.org/abs/2406.03368}, 
+      url={https://arxiv.org/abs/2406.03368},
 }
 ```

@@ -30,7 +30,7 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU

 * `afrixnli`: All afrixnli tasks
 * `afrixnli_en_direct`: afrixnli_en_direct evaluates models performance using the anli prompt on the curated dataset
-* `afrixnli_native_direct`: afrixnli_native_direct evaluates models performance using the anli prompt translated to the 
+* `afrixnli_native_direct`: afrixnli_native_direct evaluates models performance using the anli prompt translated to the
 respective languages on the curated dataset
 * `afrixnli_translate`: afrixnli_translate evaluates models using the anli prompt in translate-test setting
 * `afrixnli_manual_direct`: afrixnli_manual_direct evaluates models performance using Lai's prompt on the curated dataset

--- a/lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py
+++ b/lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py
@@ -2,11 +2,7 @@ from sklearn.metrics import f1_score


 def doc_to_target(doc):
-    replacements = {
-        0: 'True',
-        1: 'Neither',
-        2: 'False'
-    }
+    replacements = {0: "True", 1: "Neither", 2: "False"}
    return replacements[doc["label"]]



--- a/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml
+++ b/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml
@@ -2,4 +2,3 @@
 dataset_name: amh
 include: afrixnli_translate_yaml
 task: afrixnli_translate_amh
-
--- a/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py
+++ b/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py
@@ -2,11 +2,7 @@ from sklearn.metrics import f1_score


 def doc_to_target(doc):
-    replacements = {
-        0: 'True',
-        1: 'Neither',
-        2: 'False'
-    }
+    replacements = {0: "True", 1: "Neither", 2: "False"}
    return replacements[doc["label"]]



--- a/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py
+++ b/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py
@@ -2,25 +2,20 @@ from sklearn.metrics import f1_score


 def doc_to_text(doc):
-    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise 
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
-    
+
    Premise: {premise}
    Hypothesis: {hypothesis}
-    
+
    Is it entailment, contradiction, or neutral?"""

-    text = output.format(premise=doc['premise'],
-                         hypothesis=doc['hypothesis'])
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
    return text


 def doc_to_target(doc):
-    replacements = {
-        0: 'entailment',
-        1: 'neutral',
-        2: 'contradiction'
-    }
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
    return replacements[doc["label"]]



--- a/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py
+++ b/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py
@@ -2,25 +2,20 @@ from sklearn.metrics import f1_score


 def doc_to_text(doc):
-    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise 
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
-    
+
    Premise: {premise}
    Hypothesis: {hypothesis}
-    
+
    Is it entailment, contradiction, or neutral?"""

-    text = output.format(premise=doc['premise'],
-                         hypothesis=doc['hypothesis'])
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
    return text


 def doc_to_target(doc):
-    replacements = {
-        0: 'entailment',
-        1: 'neutral',
-        2: 'contradiction'
-    }
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
    return replacements[doc["label"]]



--- a/lm_eval/tasks/afrixnli/utils.py
+++ b/lm_eval/tasks/afrixnli/utils.py
-import yaml
 import argparse

+import yaml
+

 class FunctionTag:
    def __init__(self, value):
@@ -12,110 +13,110 @@ LANGUAGES = {
        "QUESTION_WORD": "ትክክል",
        "ENTAILMENT_LABEL": "አዎ",
        "NEUTRAL_LABEL": "እንዲሁም",
-        "CONTRADICTION_LABEL": "አይ"
+        "CONTRADICTION_LABEL": "አይ",
    },
    "eng": {
        "QUESTION_WORD": "Right",
        "ENTAILMENT_LABEL": "Yes",
        "NEUTRAL_LABEL": "Also",
-        "CONTRADICTION_LABEL": "No"
+        "CONTRADICTION_LABEL": "No",
    },
    "ewe": {
        "QUESTION_WORD": "Esɔ gbe",
        "ENTAILMENT_LABEL": "Ɛ̃",
        "NEUTRAL_LABEL": "Hã",
-        "CONTRADICTION_LABEL": "Ao"
+        "CONTRADICTION_LABEL": "Ao",
    },
    "fra": {
        "QUESTION_WORD": "correct",
        "ENTAILMENT_LABEL": "Oui",
        "NEUTRAL_LABEL": "Aussi",
-        "CONTRADICTION_LABEL": "Non"
+        "CONTRADICTION_LABEL": "Non",
    },
    "hau": {
        "QUESTION_WORD": "Daidai",
        "ENTAILMENT_LABEL": "Ee",
        "NEUTRAL_LABEL": "Haka kuma",
-        "CONTRADICTION_LABEL": "A'a"
+        "CONTRADICTION_LABEL": "A'a",
    },
    "ibo": {
        "QUESTION_WORD": "Ziri ezi",
        "ENTAILMENT_LABEL": "Éè",
        "NEUTRAL_LABEL": "Ọzọkwa",
-        "CONTRADICTION_LABEL": "Mba"
+        "CONTRADICTION_LABEL": "Mba",
    },
    "kin": {
        "QUESTION_WORD": "Nibyo",
        "ENTAILMENT_LABEL": "Yego",
        "NEUTRAL_LABEL": "Na none",
-        "CONTRADICTION_LABEL": "Oya"
+        "CONTRADICTION_LABEL": "Oya",
    },
    "lin": {
        "QUESTION_WORD": "Malamu",
        "ENTAILMENT_LABEL": "Iyo",
        "NEUTRAL_LABEL": "Lisusu",
-        "CONTRADICTION_LABEL": "Te"
+        "CONTRADICTION_LABEL": "Te",
    },
    "lug": {
        "QUESTION_WORD": "Kituufu",
        "ENTAILMENT_LABEL": "Yee",
        "NEUTRAL_LABEL": "N’ekirala",
-        "CONTRADICTION_LABEL": "Nedda"
+        "CONTRADICTION_LABEL": "Nedda",
    },
    "orm": {
        "QUESTION_WORD": "Sirrii",
        "ENTAILMENT_LABEL": "Eeyyee",
        "NEUTRAL_LABEL": "Akkasumas",
-        "CONTRADICTION_LABEL": "Lakki"
+        "CONTRADICTION_LABEL": "Lakki",
    },
    "sna": {
        "QUESTION_WORD": "Chokwadi",
        "ENTAILMENT_LABEL": "Hongu",
        "NEUTRAL_LABEL": "Uye",
-        "CONTRADICTION_LABEL": "Kwete"
+        "CONTRADICTION_LABEL": "Kwete",
    },
    "sot": {
        "QUESTION_WORD": "Nepile",
        "ENTAILMENT_LABEL": "E",
        "NEUTRAL_LABEL": "Hape",
-        "CONTRADICTION_LABEL": "Tjhe"
+        "CONTRADICTION_LABEL": "Tjhe",
    },
    "swa": {
        "QUESTION_WORD": "Sahihi",
        "ENTAILMENT_LABEL": "Ndiyo",
        "NEUTRAL_LABEL": "Pia",
-        "CONTRADICTION_LABEL": "Hapana"
+        "CONTRADICTION_LABEL": "Hapana",
    },
    "twi": {
        "QUESTION_WORD": "Nifa",
        "ENTAILMENT_LABEL": "Aane",
        "NEUTRAL_LABEL": "Anaasɛ",
-        "CONTRADICTION_LABEL": "Daabi"
+        "CONTRADICTION_LABEL": "Daabi",
    },
    "wol": {
        "QUESTION_WORD": "Dëgg",
        "ENTAILMENT_LABEL": "Waaw",
        "NEUTRAL_LABEL": "Itam",
-        "CONTRADICTION_LABEL": "Déet"
+        "CONTRADICTION_LABEL": "Déet",
    },
    "xho": {
        "QUESTION_WORD": "Ichanekile",
        "ENTAILMENT_LABEL": "Ewe",
        "NEUTRAL_LABEL": "Kananjalo",
-        "CONTRADICTION_LABEL": "Hayi"
+        "CONTRADICTION_LABEL": "Hayi",
    },
    "yor": {
        "QUESTION_WORD": "Òótọ́",
        "ENTAILMENT_LABEL": "Bẹ́ẹ̀ni",
        "NEUTRAL_LABEL": "Àti pé",
-        "CONTRADICTION_LABEL": "Rárá"
+        "CONTRADICTION_LABEL": "Rárá",
    },
    "zul": {
        "QUESTION_WORD": "Kulungile",
        "ENTAILMENT_LABEL": "Yebo",
        "NEUTRAL_LABEL": "Futhi",
-        "CONTRADICTION_LABEL": "Cha"
-    }
+        "CONTRADICTION_LABEL": "Cha",
+    },
 }


@@ -127,8 +128,26 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
    :param overwrite: Whether to overwrite files if they already exist.
    """
    err = []
-    languages = ['eng', 'amh', 'ibo', 'fra', 'sna', 'wol', 'ewe', 'lin', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm',
-                 'yor', 'hau', 'sot', 'swa']
+    languages = [
+        "eng",
+        "amh",
+        "ibo",
+        "fra",
+        "sna",
+        "wol",
+        "ewe",
+        "lin",
+        "lug",
+        "xho",
+        "kin",
+        "twi",
+        "zul",
+        "orm",
+        "yor",
+        "hau",
+        "sot",
+        "swa",
+    ]
    for lang in languages:
        try:
            if mode == "native-direct":
@@ -141,7 +160,9 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                task_name = f"afrixnli_native_direct_{lang}"
                yaml_template = "afrixnli_native_direct_yaml"
                with open(
-                        f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
+                    f"{output_dir}/{file_name}",
+                    "w" if overwrite else "x",
+                    encoding="utf8",
                ) as f:
                    f.write("# Generated by utils.py\n")
                    yaml.dump(
@@ -150,10 +171,10 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                            "task": task_name,
                            "dataset_name": lang,
                            "doc_to_choice": f"{{{{["
-                                           f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,"""
-                                           f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,"""
-                                           f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis"""
-                                           f"]}}}}",
+                            f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,"""
+                            f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,"""
+                            f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis"""
+                            f"]}}}}",
                        },
                        f,
                        allow_unicode=True,
@@ -163,14 +184,16 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                task_name = f"afrixnli_{mode}_{lang}"
                yaml_template = f"afrixnli_{mode}_yaml"
                with open(
-                        f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
+                    f"{output_dir}/{file_name}",
+                    "w" if overwrite else "x",
+                    encoding="utf8",
                ) as f:
                    f.write("# Generated by utils.py\n")
                    yaml.dump(
                        {
                            "include": yaml_template,
                            "task": task_name,
-                            "dataset_name": lang
+                            "dataset_name": lang,
                        },
                        f,
                        allow_unicode=True,
@@ -195,7 +218,9 @@ def main() -> None:
        help="Overwrite files if they already exist",
    )
    parser.add_argument(
-        "--output-dir", default="./manual/translate", help="Directory to write yaml files to"
+        "--output-dir",
+        default="./manual/translate",
+        help="Directory to write yaml files to",
    )
    parser.add_argument(
        "--mode",

--- a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_atc.yaml
+++ b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_atc.yaml
@@ -3,4 +3,4 @@ task:
  - med_concepts_qa_atc_tasks
 aggregate_metric_list:
  - metric: acc
-    aggregation: mean
\ No newline at end of file
+    aggregation: mean
--- a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10proc.yaml
+++ b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10proc.yaml
@@ -3,4 +3,4 @@ task:
  - med_concepts_qa_icd10proc_tasks
 aggregate_metric_list:
  - metric: acc
-    aggregation: mean
\ No newline at end of file
+    aggregation: mean
--- a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9cm.yaml
+++ b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9cm.yaml
@@ -3,4 +3,4 @@ task:
  - med_concepts_qa_icd9cm_tasks
 aggregate_metric_list:
  - metric: acc
-    aggregation: mean
\ No newline at end of file
+    aggregation: mean
--- a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9proc.yaml
+++ b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9proc.yaml
@@ -3,4 +3,4 @@ task:
  - med_concepts_qa_icd9proc_tasks
 aggregate_metric_list:
  - metric: acc
-    aggregation: mean
\ No newline at end of file
+    aggregation: mean