update afrixnli tasks

b84c8c9e · JessicaOjo · bcee8f2e · b84c8c9e · b84c8c9e · b84c8c9e
Commit b84c8c9e authored May 12, 2024 by JessicaOjo
20 changed files
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_ewe.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_ewe.yaml
+# Generated by utils.py
+dataset_name: ewe
+doc_to_choice: '{{[premise+", Esɔ gbe? Ɛ̃, "+hypothesis,premise+", Esɔ gbe? Hã, "+hypothesis,premise+",
+  Esɔ gbe? Ao, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_ewe
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_fra.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_fra.yaml
+# Generated by utils.py
+dataset_name: fra
+doc_to_choice: '{{[premise+", correct? Oui, "+hypothesis,premise+", correct? Aussi,
+  "+hypothesis,premise+", correct? Non, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_fra
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_hau.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_hau.yaml
+# Generated by utils.py
+dataset_name: hau
+doc_to_choice: '{{[premise+", Daidai? Ee, "+hypothesis,premise+", Daidai? Haka kuma,
+  "+hypothesis,premise+", Daidai? A''a, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_hau
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_ibo.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_ibo.yaml
+# Generated by utils.py
+dataset_name: ibo
+doc_to_choice: '{{[premise+", Nke ọma? Ee, "+hypothesis,premise+", Nke ọma? Nakwa,
+  "+hypothesis,premise+", Nke ọma? Mba, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_ibo
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_kin.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_kin.yaml
+# Generated by utils.py
+dataset_name: kin
+doc_to_choice: '{{[premise+", Nibyo? Yego, "+hypothesis,premise+", Nibyo? Na none,
+  "+hypothesis,premise+", Nibyo? Oya, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_kin
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_lug.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_lug.yaml
+# Generated by utils.py
+dataset_name: lug
+doc_to_choice: '{{[premise+", Kituufu? Yee, "+hypothesis,premise+", Kituufu? N’ekirala,
+  "+hypothesis,premise+", Kituufu? Nedda, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_lug
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_orm.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_orm.yaml
+# Generated by utils.py
+dataset_name: orm
+doc_to_choice: '{{[premise+", Sirrii? Eeyyee, "+hypothesis,premise+", Sirrii? Akkasumas,
+  "+hypothesis,premise+", Sirrii? Lakki, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_orm
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_sna.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_sna.yaml
+# Generated by utils.py
+dataset_name: sna
+doc_to_choice: '{{[premise+", Chokwadi? Hongu, "+hypothesis,premise+", Chokwadi? Uye,
+  "+hypothesis,premise+", Chokwadi? Kwete, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_sna
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_sot.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_sot.yaml
+# Generated by utils.py
+dataset_name: sot
+doc_to_choice: '{{[premise+", Nepile? E, "+hypothesis,premise+", Nepile? Hape, "+hypothesis,premise+",
+  Nepile? Tjhe, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_sot
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_swa.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_swa.yaml
+# Generated by utils.py
+dataset_name: swa
+doc_to_choice: '{{[premise+", Sahihi? Ndiyo, "+hypothesis,premise+", Sahihi? Pia,
+  "+hypothesis,premise+", Sahihi? Hapana, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_swa
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_twi.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_twi.yaml
+# Generated by utils.py
+dataset_name: twi
+doc_to_choice: '{{[premise+", Nifa? Aane, "+hypothesis,premise+", Nifa? Anaasɛ, "+hypothesis,premise+",
+  Nifa? Daabi, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_twi
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_wol.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_wol.yaml
+# Generated by utils.py
+dataset_name: wol
+doc_to_choice: '{{[premise+", Dëgg? Waaw, "+hypothesis,premise+", Dëgg? Itam, "+hypothesis,premise+",
+  Dëgg? Déet, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_wol
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_xho.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_xho.yaml
+# Generated by utils.py
+dataset_name: xho
+doc_to_choice: '{{[premise+", Kunene? Ewe, "+hypothesis,premise+", Kunene? Kananjalo,
+  "+hypothesis,premise+", Kunene? Hayi, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_xho
--- a/lm_eval/tasks/afrixnli/afrixnli_common_yaml
+++ b/lm_eval/tasks/afrixnli/afrixnli_common_yaml
 group:
    - xnli
    - afrixnli
-task: null
 dataset_path: masakhane/afrixnli
 dataset_name: null
-output_type: generate_until
+output_type: multiple_choice
 validation_split: validation
 test_split: test
 fewshot_split: validation
-doc_to_text: null
 doc_to_target: label
-doc_to_choice: null
+doc_to_text: ""
 metric_list:
  - metric: f1
+    aggregation: f1
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
  - metric: acc
    aggregation: mean
    higher_is_better: true

--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_yor.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_yor.yaml
+# Generated by utils.py
+dataset_name: yor
+doc_to_choice: '{{[premise+", Òótọ́? Bẹ́ẹ̀ni, "+hypothesis,premise+", Òótọ́? Bákan
+  náà, "+hypothesis,premise+", Òótọ́? Rárá, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_yor
--- a/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_zul.yaml
+++ b/lm_eval/tasks/afrixnli/native-direct/afrixnli_native_direct_zul.yaml
+# Generated by utils.py
+dataset_name: zul
+doc_to_choice: '{{[premise+", Kulungile? Yebo, "+hypothesis,premise+", Kulungile?
+  Futhi, "+hypothesis,premise+", Kulungile? Cha, "+hypothesis]}}'
+include: afrixnli_native_direct_yaml
+task: afrixnli_native_direct_zul
--- a/lm_eval/tasks/afrixnli/native_direct.sh
+++ b/lm_eval/tasks/afrixnli/native_direct.sh
+#!/bin/bash
+models=(
+  "masakhane/African-ultrachat-alpaca"
+  "masakhane/zephyr-7b-gemma-sft-african-alpaca"
+  "masakhane/zephyr-7b-gemma-sft-african-ultrachat-5k"
+  "google/flan-t5-xxl"
+  "bigscience/mt0-xxl-mt"
+  "CohereForAI/aya-101"
+  "bigscience/bloomz-7b1-mt"
+  "meta-llama/Llama-2-7b-chat-hf"
+  "meta-llama/Meta-Llama-3-8B-Instruct"
+  "meta-llama/Meta-Llama-3-70B-Instruct"
+  "google/gemma-1.1-7b-it"
+  "RWKV/v5-EagleX-v2-7B-HF"
+  "RWKV/rwkv-6-world-7b"
+)
+task=afrixnli_native_direct_amh,afrixnli_native_direct_eng,afrixnli_native_direct_ewe,afrixnli_native_direct_fra,afrixnli_native_direct_hau,afrixnli_native_direct_ibo,afrixnli_native_direct_kin,afrixnli_native_direct_lug,afrixnli_native_direct_orm,afrixnli_native_direct_sna,afrixnli_native_direct_sot,afrixnli_native_direct_swa,afrixnli_native_direct_twi,afrixnli_native_direct_wol,afrixnli_native_direct_xho,afrixnli_native_direct_yor,afrixnli_native_direct_zul
+for model in "${models[@]}"
+do
+  echo "Evaluating model: $model"
+  for fewshot in 0 2 4 6 8
+  do
+    export OUTPUT_DIR=results/${model##*/}/$fewshot
+    mkdir -p "$OUTPUT_DIR"
+    lm_eval --model hf \
+            --model_args "pretrained=${model}" \
+            --tasks $task\
+            --device cuda:0 \
+            --batch_size 16 \
+            --output_path "$OUTPUT_DIR" \
+            --num_fewshot $fewshot \
+            --limit 1 \
+            --verbosity DEBUG
+  done
+done
\ No newline at end of file
--- a/lm_eval/tasks/afrixnli/preprocess_wikitext.py
+++ b/lm_eval/tasks/afrixnli/preprocess_wikitext.py
-import re
-def wikitext_detokenizer(doc):
-    string = doc["label"]
-    string = string.replace('[.,]', '').lower()
-    string = string.split("\\n\\n")
-    string = string.split("<pad>")[-1].split("</s>")[0].strip()
-    string = extract_answer(string)
-    string = verbalizer(string.strip())
-    return string
-def extract_answer(string):
-    pattern = r'(\*\*answer:\*\*|\*answer is:\*|\*\*|\*\*|\*answer is exact\*|label:|the premise and hypothesis ' \
-              r'are|the premise and the hypothesis is|the premise and the hypothesis is a|described as|therefore they ' \
-              r'are|therefore|are considered|is an exact|it is|is a|is)\s*(neutral|entailment|contradiction)'
-    match = re.search(pattern, string, re.IGNORECASE)
-    return match.group(2) if match else string
-def verbalizer(string):
-    verbalizer_dict = {
-        "entailment": ['encouragement', 'entitlement', 'entails', 'entailed', 'entailment'],
-        "contradiction": ['contradictory', 'contradicts', 'contradiction'],
-        "neutral": ['neutral']}
-    for key, values in verbalizer_dict.items():
-        for value in values:
-            if value in string:
-                return key
-    return string
--- a/lm_eval/tasks/afrixnli/utils.py
+++ b/lm_eval/tasks/afrixnli/utils.py
@@ -7,14 +7,113 @@ class FunctionTag:
        self.value = value
-def function_representer(dumper, data):
+LANGUAGES = {
-    return dumper.represent_scalar('!function', data.value, style='')
+    "amh": {
+        "QUESTION_WORD": "ትክክል",
+        "ENTAILMENT_LABEL": "አዎ",
-yaml.add_representer(FunctionTag, function_representer)
+        "NEUTRAL_LABEL": "እንዲሁም",
+        "CONTRADICTION_LABEL": "አይ"
+    },
+    "eng": {
+        "QUESTION_WORD": "Right",
+        "ENTAILMENT_LABEL": "Yes",
+        "NEUTRAL_LABEL": "Also",
+        "CONTRADICTION_LABEL": "No"
+    },
+    "ewe": {
+        "QUESTION_WORD": "Esɔ gbe",
+        "ENTAILMENT_LABEL": "Ɛ̃",
+        "NEUTRAL_LABEL": "Hã",
+        "CONTRADICTION_LABEL": "Ao"
+    },
+    "fra": {
+        "QUESTION_WORD": "correct",
+        "ENTAILMENT_LABEL": "Oui",
+        "NEUTRAL_LABEL": "Aussi",
+        "CONTRADICTION_LABEL": "Non"
+    },
+    "hau": {
+        "QUESTION_WORD": "Daidai",
+        "ENTAILMENT_LABEL": "Ee",
+        "NEUTRAL_LABEL": "Haka kuma",
+        "CONTRADICTION_LABEL": "A'a"
+    },
+    "ibo": {
+        "QUESTION_WORD": "Nke ọma",
+        "ENTAILMENT_LABEL": "Ee",
+        "NEUTRAL_LABEL": "Nakwa",
+        "CONTRADICTION_LABEL": "Mba"
+    },
+    "kin": {
+        "QUESTION_WORD": "Nibyo",
+        "ENTAILMENT_LABEL": "Yego",
+        "NEUTRAL_LABEL": "Na none",
+        "CONTRADICTION_LABEL": "Oya"
+    },
+    "lug": {
+        "QUESTION_WORD": "Kituufu",
+        "ENTAILMENT_LABEL": "Yee",
+        "NEUTRAL_LABEL": "N’ekirala",
+        "CONTRADICTION_LABEL": "Nedda"
+    },
+    "orm": {
+        "QUESTION_WORD": "Sirrii",
+        "ENTAILMENT_LABEL": "Eeyyee",
+        "NEUTRAL_LABEL": "Akkasumas",
+        "CONTRADICTION_LABEL": "Lakki"
+    },
+    "sna": {
+        "QUESTION_WORD": "Chokwadi",
+        "ENTAILMENT_LABEL": "Hongu",
+        "NEUTRAL_LABEL": "Uye",
+        "CONTRADICTION_LABEL": "Kwete"
+    },
+    "sot": {
+        "QUESTION_WORD": "Nepile",
+        "ENTAILMENT_LABEL": "E",
+        "NEUTRAL_LABEL": "Hape",
+        "CONTRADICTION_LABEL": "Tjhe"
+    },
+    "swa": {
+        "QUESTION_WORD": "Sahihi",
+        "ENTAILMENT_LABEL": "Ndiyo",
+        "NEUTRAL_LABEL": "Pia",
+        "CONTRADICTION_LABEL": "Hapana"
+    },
+    "twi": {
+        "QUESTION_WORD": "Nifa",
+        "ENTAILMENT_LABEL": "Aane",
+        "NEUTRAL_LABEL": "Anaasɛ",
+        "CONTRADICTION_LABEL": "Daabi"
+    },
+    "wol": {
+        "QUESTION_WORD": "Dëgg",
+        "ENTAILMENT_LABEL": "Waaw",
+        "NEUTRAL_LABEL": "Itam",
+        "CONTRADICTION_LABEL": "Déet"
+    },
+    "xho": {
+        "QUESTION_WORD": "Kunene",
+        "ENTAILMENT_LABEL": "Ewe",
+        "NEUTRAL_LABEL": "Kananjalo",
+        "CONTRADICTION_LABEL": "Hayi"
+    },
+    "yor": {
+        "QUESTION_WORD": "Òótọ́",
+        "ENTAILMENT_LABEL": "Bẹ́ẹ̀ni",
+        "NEUTRAL_LABEL": "Bákan náà",
+        "CONTRADICTION_LABEL": "Rárá"
+    },
+    "zul": {
+        "QUESTION_WORD": "Kulungile",
+        "ENTAILMENT_LABEL": "Yebo",
+        "NEUTRAL_LABEL": "Futhi",
+        "CONTRADICTION_LABEL": "Cha"
+    }
+}
-def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
    """
    Generate a yaml file for each language.
@@ -22,23 +121,50 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
    :param overwrite: Whether to overwrite files if they already exist.
    """
    err = []
-    languages = ['amh', 'ibo', 'fra', 'sna', 'lin', 'wol', 'ewe', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm',
+    languages = ['eng', 'amh', 'ibo', 'fra', 'sna', 'wol', 'ewe', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm',
                 'yor', 'hau', 'sot', 'swa']
    for lang in languages:
-        file_name = f"afrixnli_{lang}.yaml"
        try:
+            if mode == "en_direct":
+                file_name = f"afrixnli_en_direct_{lang}.yaml"
+                task_name = f"afrixnli_en_direct_{lang}"
+                yaml_template = "afrixnli_en_direct_yaml"
                with open(
                        f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
                ) as f:
                    f.write("# Generated by utils.py\n")
                    yaml.dump(
                        {
-                        "include": "afrixnli_common_yaml",
+                            "include": yaml_template,
-                        "task": f"afrixnli_{lang}",
+                            "task": task_name,
+                            "dataset_name": lang
+                        },
+                        f,
+                        allow_unicode=True,
+                    )
+            elif mode == "native-direct":
+                QUESTION_WORD = LANGUAGES[lang]["QUESTION_WORD"]
+                ENTAILMENT_LABEL = LANGUAGES[lang]["ENTAILMENT_LABEL"]
+                NEUTRAL_LABEL = LANGUAGES[lang]["NEUTRAL_LABEL"]
+                CONTRADICTION_LABEL = LANGUAGES[lang]["CONTRADICTION_LABEL"]
+                file_name = f"afrixnli_native_direct_{lang}.yaml"
+                task_name = f"afrixnli_native_direct_{lang}"
+                yaml_template = "afrixnli_native_direct_yaml"
+                with open(
+                        f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
+                ) as f:
+                    f.write("# Generated by utils.py\n")
+                    yaml.dump(
+                        {
+                            "include": yaml_template,
+                            "task": task_name,
                            "dataset_name": lang,
-                        "doc_to_target": "{{label}}", #FunctionTag('preprocess_wikitext.wikitext_detokenizer'),
+                            "doc_to_choice": f"{{{{["
-                        "doc_to_text": "Premise: {{premise}} \nHypothesis: {{hypothesis}} \nIs it entailment, "
+                                           f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,"""
-                                       "contradiction, or neutral?"
+                                           f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,"""
+                                           f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis"""
+                                           f"]}}}}",
                        },
                        f,
                        allow_unicode=True,
@@ -63,11 +189,17 @@ def main() -> None:
        help="Overwrite files if they already exist",
    )
    parser.add_argument(
-        "--output-dir", default=".", help="Directory to write yaml files to"
+        "--output-dir", default="./native-direct", help="Directory to write yaml files to"
+    )
+    parser.add_argument(
+        "--mode",
+        default="native-direct",
+        choices=["en_direct", "native-direct"],
+        help="Mode of chain-of-thought",
    )
    args = parser.parse_args()
-    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite)
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
 if __name__ == "__main__":

--- a/results/afrixnli_samples/afrixnli_en_direct_amh
+++ b/results/afrixnli_samples/afrixnli_en_direct_amh
+!!@@##@@!! -- Example 0
+እንደ ተግዳሮቶች በአሁኑ ወቅት ወደ  ሁለት ተከታታይ ትናንሽ ቅርጾች ሊቆረጥ የሚችል ቃል እየፈለግሁ ነው።
+Question: ቀላል ጊዜ አልነበረኝም ለሁለት ተከታታይ ትናንሽ ቅርጾች ሊቆራረጥ የሚችል ቃል ለማግኘት ። True, False, or Neither?
+Answer: True
+እንግዲህ , እኔ ስለ እሱ እያሰብኩኝ አልነበረም , ነገር ግን እኔ በጣም ተስፋ ቆርጨ ነበር, እና, እኔ እንደገና ከእርሱ ጋር ማውራት ጀመርኩ።
+Question: እኔ ከእርሱ ጋር ድጋሜ አልተነጋገርኩም። True, False, or Neither?
+Answer:
+!!@@##@@!! -- Example 1
+ስጦታህ ለ85ኛ ዓመት ክብረ በዓላችን አስፈላጊ ነው።
+Question: ያገኘነው ስጦታ ሁሉ እንደ አንተ ስጦታ አስፈላጊ አይደለም። True, False, or Neither?
+Answer: Neither
+እንግዲህ , እኔ ስለ እሱ እያሰብኩኝ አልነበረም , ነገር ግን እኔ በጣም ተስፋ ቆርጨ ነበር, እና, እኔ እንደገና ከእርሱ ጋር ማውራት ጀመርኩ።
+Question: እኔ በጣም ስለተበሳጨሁ እንደገና እሱን ማነጋገር ጀመርኩ። True, False, or Neither?
+Answer:
+!!@@##@@!! -- Example 2
+በንዑስ ክፍል አቀራረብ ውስጥ መሰረታዊ እና የሥራ-ድርሻ ምድብ እያንዳንዳቸው አማካይ መጠናቸውን ለማግኘት ከዋጋው በላይ መቶኛ ምልክት ይሰጣቸዋል።
+Question: ዋጋው ከወጪው 10% የበለጠ ነው፡፡ True, False, or Neither?
+Answer: Neither
+እንግዲህ , እኔ ስለ እሱ እያሰብኩኝ አልነበረም , ነገር ግን እኔ በጣም ተስፋ ቆርጨ ነበር, እና, እኔ እንደገና ከእርሱ ጋር ማውራት ጀመርኩ።
+Question: ጥሩ ንግግር አድርገናል። True, False, or Neither?
+Answer: