add manual xnli prompt, add multichoice for openai models, and adapt...

add manual xnli prompt, add multichoice for openai models, and adapt multichoice metric for openai model

add manual xnli prompt, add multichoice for openai models, and adapt...
add manual xnli prompt, add multichoice for openai models, and adapt multichoice metric for openai model
753e8670 · JessicaOjo · f720ce81 · 753e8670 · 753e8670 · 753e8670
Commit 753e8670 authored May 22, 2024 by JessicaOjo
7 changed files
--- a/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_wol.yaml
+++ b/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_wol.yaml
+# Generated by utils.py
+dataset_name: wol
+include: afrixnli_manual_translate_yaml
+task: afrixnli_manual_translate_wol
--- a/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_xho.yaml
+++ b/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_xho.yaml
+# Generated by utils.py
+dataset_name: xho
+include: afrixnli_manual_translate_yaml
+task: afrixnli_manual_translate_xho
--- a/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_yaml
+++ b/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_yaml
+group:
+    - xnli
+    - afrixnli
+    - afrixnli-manual
+    - afrixnli-translate-test
+dataset_path: masakhane/afrixnli-translate-test
+dataset_name: null
+output_type: multiple_choice_gpt
+test_split: test
+doc_to_text: !function utils.doc_to_text
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "entailment"
+  - "neutral"
+  - "contradiction"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: acc_gpt
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
--- a/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_yor.yaml
+++ b/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_yor.yaml
+# Generated by utils.py
+dataset_name: yor
+include: afrixnli_manual_translate_yaml
+task: afrixnli_manual_translate_yor
--- a/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_zul.yaml
+++ b/lm_eval/tasks/afrixnli/manual/translate/afrixnli_manual_translate_zul.yaml
+# Generated by utils.py
+dataset_name: zul
+include: afrixnli_manual_translate_yaml
+task: afrixnli_manual_translate_zul
--- a/lm_eval/tasks/afrixnli/manual/translate/utils.py
+++ b/lm_eval/tasks/afrixnli/manual/translate/utils.py
+from sklearn.metrics import f1_score
+def doc_to_text(doc):
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise 
+    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
+    Premise: {premise}
+    Hypothesis: {hypothesis}
+    Is it entailment, contradiction, or neutral?"""
+    text = output.format(premise=doc['premise'],
+                         hypothesis=doc['hypothesis'])
+    return text
+def doc_to_target(doc):
+    replacements = {
+        0: 'entailment',
+        1: 'neutral',
+        2: 'contradiction'
+    }
+    return replacements[doc["label"]]
+def weighted_f1_score(items):
+    unzipped_list = list(zip(*items))
+    golds = unzipped_list[0]
+    preds = unzipped_list[1]
+    fscore = f1_score(golds, preds, average="weighted")
+    return fscore
--- a/lm_eval/tasks/afrixnli/utils.py
+++ b/lm_eval/tasks/afrixnli/utils.py
@@ -131,24 +131,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                 'yor', 'hau', 'sot', 'swa']
    for lang in languages:
        try:
-            if mode == "en_direct":
+            if mode == "native-direct":
-                file_name = f"afrixnli_en_direct_{lang}.yaml"
-                task_name = f"afrixnli_en_direct_{lang}"
-                yaml_template = "afrixnli_en_direct_yaml"
-                with open(
-                        f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
-                ) as f:
-                    f.write("# Generated by utils.py\n")
-                    yaml.dump(
-                        {
-                            "include": yaml_template,
-                            "task": task_name,
-                            "dataset_name": lang
-                        },
-                        f,
-                        allow_unicode=True,
-                    )
-            elif mode == "native-direct":
                QUESTION_WORD = LANGUAGES[lang]["QUESTION_WORD"]
                ENTAILMENT_LABEL = LANGUAGES[lang]["ENTAILMENT_LABEL"]
                NEUTRAL_LABEL = LANGUAGES[lang]["NEUTRAL_LABEL"]
@@ -175,6 +158,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                        f,
                        allow_unicode=True,
                    )
+            else:
+                file_name = f"afrixnli_{mode}_{lang}.yaml"
+                task_name = f"afrixnli_{mode}_{lang}"
+                yaml_template = f"afrixnli_{mode}_yaml"
+                with open(
+                        f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
+                ) as f:
+                    f.write("# Generated by utils.py\n")
+                    yaml.dump(
+                        {
+                            "include": yaml_template,
+                            "task": task_name,
+                            "dataset_name": lang
+                        },
+                        f,
+                        allow_unicode=True,
+                    )
        except FileExistsError:
            err.append(file_name)
@@ -195,12 +195,12 @@ def main() -> None:
        help="Overwrite files if they already exist",
    )
    parser.add_argument(
-        "--output-dir", default="./native-direct", help="Directory to write yaml files to"
+        "--output-dir", default="./manual/translate", help="Directory to write yaml files to"
    )
    parser.add_argument(
        "--mode",
-        default="native-direct",
+        default="manual_translate",
-        choices=["en_direct", "native-direct"],
+        choices=["en_direct", "native-direct", "manual_direct", "manual_translate"],
        help="Mode of chain-of-thought",
    )
    args = parser.parse_args()