Merge branch 'big-refactor' into update_docs

767c58b9 · lintangsutawika · 3bfbddc4 · 759da8d5 · 767c58b9 · 767c58b9
Commit 767c58b9 authored Aug 16, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/xcopa/default_id.yaml
+++ b/lm_eval/tasks/xcopa/default_id.yaml
+include: default_et.yaml
+task: xcopa_id
+dataset_name: id
+doc_to_text: !function utils.doc_to_text_id
--- a/lm_eval/tasks/xcopa/default_it.yaml
+++ b/lm_eval/tasks/xcopa/default_it.yaml
+include: default_et.yaml
+task: xcopa_it
+dataset_name: it
+doc_to_text: !function utils.doc_to_text_it
--- a/lm_eval/tasks/xcopa/default_qu.yaml
+++ b/lm_eval/tasks/xcopa/default_qu.yaml
+include: default_et.yaml
+task: xcopa_qu
+dataset_name: qu
+doc_to_text: !function utils.doc_to_text_qu
--- a/lm_eval/tasks/xcopa/default_sw.yaml
+++ b/lm_eval/tasks/xcopa/default_sw.yaml
+include: default_et.yaml
+task: xcopa_sw
+dataset_name: sw
+doc_to_text: !function utils.doc_to_text_sw
--- a/lm_eval/tasks/xcopa/default_ta.yaml
+++ b/lm_eval/tasks/xcopa/default_ta.yaml
+include: default_et.yaml
+task: xcopa_ta
+dataset_name: ta
+doc_to_text: !function utils.doc_to_text_ta
--- a/lm_eval/tasks/xcopa/default_th.yaml
+++ b/lm_eval/tasks/xcopa/default_th.yaml
+include: default_et.yaml
+task: xcopa_th
+dataset_name: th
+doc_to_text: !function utils.doc_to_text_th
--- a/lm_eval/tasks/xcopa/default_tr.yaml
+++ b/lm_eval/tasks/xcopa/default_tr.yaml
+include: default_et.yaml
+task: xcopa_tr
+dataset_name: tr
+doc_to_text: !function utils.doc_to_text_tr
--- a/lm_eval/tasks/xcopa/default_vi.yaml
+++ b/lm_eval/tasks/xcopa/default_vi.yaml
+include: default_et.yaml
+task: xcopa_vi
+dataset_name: vi
+doc_to_text: !function utils.doc_to_text_vi
--- a/lm_eval/tasks/xcopa/default_zh.yaml
+++ b/lm_eval/tasks/xcopa/default_zh.yaml
+include: default_et.yaml
+task: xcopa_zh
+dataset_name: zh
+doc_to_text: !function utils.doc_to_text_zh
--- a/lm_eval/tasks/xcopa/utils.py
+++ b/lm_eval/tasks/xcopa/utils.py
+from functools import partial
+
+
+def convert_choice(choice):
+    return choice[0].lower() + choice[1:]
+
+
+def doc_to_text(doc, connector):
+    # Drop the period
+    conn = connector[doc["question"]]
+    return doc["premise"].strip()[:-1] + f" {conn}"
+
+
+def doc_to_choice(doc):
+    return [convert_choice(doc["choice1"]), convert_choice(doc["choice2"])]
+
+
+doc_to_text_et = partial(
+    doc_to_text,
+    connector={
+        "cause": "sest",
+        "effect": "seetõttu",
+    },
+)
+
+
+doc_to_text_ht = partial(
+    doc_to_text,
+    connector={
+        "cause": "poukisa",
+        "effect": "donk sa",
+    },
+)
+
+
+doc_to_text_it = partial(
+    doc_to_text,
+    connector={
+        "cause": "perché",
+        "effect": "quindi",
+    },
+)
+
+
+doc_to_text_id = partial(
+    doc_to_text,
+    connector={
+        "cause": "karena",
+        "effect": "maka",
+    },
+)
+
+
+doc_to_text_qu = partial(
+    doc_to_text,
+    connector={
+        "cause": "imataq",
+        "effect": "chaymi",
+    },
+)
+
+
+doc_to_text_sw = partial(
+    doc_to_text,
+    connector={
+        "cause": "kwa sababu",
+        "effect": "kwa hiyo",
+    },
+)
+
+
+doc_to_text_zh = partial(
+    doc_to_text,
+    connector={
+        "cause": "因为",
+        "effect": "所以",
+    },
+)
+
+
+doc_to_text_ta = partial(
+    doc_to_text,
+    connector={
+        "cause": "காரணமாக",
+        "effect": "எனவே",
+    },
+)
+
+
+doc_to_text_th = partial(
+    doc_to_text,
+    connector={
+        "cause": "เพราะ",
+        "effect": "ดังนั้น",
+    },
+)
+
+
+doc_to_text_tr = partial(
+    doc_to_text,
+    connector={
+        "cause": "çünkü",
+        "effect": "bu yüzden",
+    },
+)
+
+
+doc_to_text_vi = partial(
+    doc_to_text,
+    connector={
+        "cause": "bởi vì",
+        "effect": "vì vậy",
+    },
+)
--- a/lm_eval/tasks/xnli/README.md
+++ b/lm_eval/tasks/xnli/README.md
+# XNLI
+
+### Paper
+
+Title: `XNLI: Evaluating Cross-lingual Sentence Representations`
+
+Abstract: https://arxiv.org/abs/1809.05053
+
+Based on the implementation of @yongzx (see https://github.com/EleutherAI/lm-evaluation-harness/pull/258)
+
+Prompt format (same as XGLM and mGPT):
+
+sentence1 + ", right? " + mask = (Yes|Also|No) + ", " + sentence2
+
+Predicition is the full sequence with the highest likelihood.
+
+Language specific prompts are translated word-by-word with Google Translate
+and may differ from the ones used by mGPT and XGLM (they do not provide their prompts).
+
+Homepage: https://github.com/facebookresearch/XNLI
+
+
+### Citation
+
+"""
+@InProceedings{conneau2018xnli,
+  author = "Conneau, Alexis
+        and Rinott, Ruty
+        and Lample, Guillaume
+        and Williams, Adina
+        and Bowman, Samuel R.
+        and Schwenk, Holger
+        and Stoyanov, Veselin",
+  title = "XNLI: Evaluating Cross-lingual Sentence Representations",
+  booktitle = "Proceedings of the 2018 Conference on Empirical Methods
+               in Natural Language Processing",
+  year = "2018",
+  publisher = "Association for Computational Linguistics",
+  location = "Brussels, Belgium",
+}
+"""
+
+### Groups and Tasks
+
+#### Groups
+
+* `xnli`
+
+#### Tasks
+
+* `xnli_ar`: Arabic
+* `xnli_bg`: Bulgarian
+* `xnli_de`: German
+* `xnli_el`: Greek
+* `xnli_en`: English
+* `xnli_es`: Spanish
+* `xnli_fr`: French
+* `xnli_hi`: Hindi
+* `xnli_ru`: Russian
+* `xnli_sw`: Swahili
+* `xnli_th`: Thai
+* `xnli_tr`: Turkish
+* `xnli_ur`: Urdu
+* `xnli_vi`: Vietnamese
+* `xnli_zh`: Chinese
+
+### Checklist
+
+For adding novel benchmarks/datasets to the library:
+* [ ] Is the task an existing benchmark in the literature?
+  * [ ] Have you referenced the original paper that introduced the task?
+  * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
+
+
+If other tasks on this dataset are already supported:
+* [ ] Is the "Main" variant of this task clearly denoted?
+* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
+* [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
--- a/lm_eval/tasks/xnli/utils.py
+++ b/lm_eval/tasks/xnli/utils.py
+import argparse
+from typing import Dict, List
+
+import yaml
+
+
+# Different languages that are part of xnli.
+# These correspond to dataset names (Subsets) on HuggingFace.
+# A yaml file is generated by this script for each language.
+
+LANGUAGES = {
+    "ar": {  # Arabic
+        "QUESTION_WORD": "صحيح",
+        "ENTAILMENT_LABEL": "نعم",
+        "NEUTRAL_LABEL": "لذا",
+        "CONTRADICTION_LABEL": "رقم",
+    },
+    "bg": {  # Bulgarian
+        "QUESTION_WORD": "правилно",
+        "ENTAILMENT_LABEL": "да",
+        "NEUTRAL_LABEL": "така",
+        "CONTRADICTION_LABEL": "не",
+    },
+    "de": {  # German
+        "QUESTION_WORD": "richtig",
+        "ENTAILMENT_LABEL": "Ja",
+        "NEUTRAL_LABEL": "Auch",
+        "CONTRADICTION_LABEL": "Nein",
+    },
+    "el": {  # Greek
+        "QUESTION_WORD": "σωστός",
+        "ENTAILMENT_LABEL": "Ναί",
+        "NEUTRAL_LABEL": "Έτσι",
+        "CONTRADICTION_LABEL": "όχι",
+    },
+    "en": {  # English
+        "QUESTION_WORD": "right",
+        "ENTAILMENT_LABEL": "Yes",
+        "NEUTRAL_LABEL": "Also",
+        "CONTRADICTION_LABEL": "No",
+    },
+    "es": {  # Spanish
+        "QUESTION_WORD": "correcto",
+        "ENTAILMENT_LABEL": "Sí",
+        "NEUTRAL_LABEL": "Asi que",
+        "CONTRADICTION_LABEL": "No",
+    },
+    "fr": {  # French
+        "QUESTION_WORD": "correct",
+        "ENTAILMENT_LABEL": "Oui",
+        "NEUTRAL_LABEL": "Aussi",
+        "CONTRADICTION_LABEL": "Non",
+    },
+    "hi": {  # Hindi
+        "QUESTION_WORD": "सही",
+        "ENTAILMENT_LABEL": "हाँ",
+        "NEUTRAL_LABEL": "इसलिए",
+        "CONTRADICTION_LABEL": "नहीं",
+    },
+    "ru": {  # Russian
+        "QUESTION_WORD": "правильно",
+        "ENTAILMENT_LABEL": "Да",
+        "NEUTRAL_LABEL": "Так",
+        "CONTRADICTION_LABEL": "Нет",
+    },
+    "sw": {  # Swahili
+        "QUESTION_WORD": "sahihi",
+        "ENTAILMENT_LABEL": "Ndiyo",
+        "NEUTRAL_LABEL": "Hivyo",
+        "CONTRADICTION_LABEL": "Hapana",
+    },
+    "th": {  # Thai
+        "QUESTION_WORD": "ถูกต้อง",
+        "ENTAILMENT_LABEL": "ใช่",
+        "NEUTRAL_LABEL": "ดังนั้น",
+        "CONTRADICTION_LABEL": "ไม่",
+    },
+    "tr": {  # Turkish
+        "QUESTION_WORD": "doğru",
+        "ENTAILMENT_LABEL": "Evet",
+        "NEUTRAL_LABEL": "Böylece",
+        "CONTRADICTION_LABEL": "Hayır",
+    },
+    "ur": {  # Urdu
+        "QUESTION_WORD": "صحیح",
+        "ENTAILMENT_LABEL": "جی ہاں",
+        "NEUTRAL_LABEL": "اس لئے",
+        "CONTRADICTION_LABEL": "نہیں",
+    },
+    "vi": {  # Vietnamese
+        "QUESTION_WORD": "đúng",
+        "ENTAILMENT_LABEL": "Vâng",
+        "NEUTRAL_LABEL": "Vì vậy",
+        "CONTRADICTION_LABEL": "Không",
+    },
+    "zh": {  # Chinese
+        "QUESTION_WORD": "正确",
+        "ENTAILMENT_LABEL": "是的",
+        "NEUTRAL_LABEL": "所以",
+        "CONTRADICTION_LABEL": "不是的",
+    },
+}
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    for lang in LANGUAGES.keys():
+        file_name = f"xnli_{lang}.yaml"
+        try:
+            QUESTION_WORD = LANGUAGES[lang]["QUESTION_WORD"]
+            ENTAILMENT_LABEL = LANGUAGES[lang]["ENTAILMENT_LABEL"]
+            NEUTRAL_LABEL = LANGUAGES[lang]["NEUTRAL_LABEL"]
+            CONTRADICTION_LABEL = LANGUAGES[lang]["CONTRADICTION_LABEL"]
+            with open(
+                f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    {
+                        "include": "xnli_common_yaml",
+                        "dataset_name": lang,
+                        "task": f"xnli_{lang}",
+                        "doc_to_text": "",
+                        "doc_to_choice": f"{{{{["
+                        f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,"""
+                        f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,"""
+                        f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis"""
+                        f"]}}}}",
+                    },
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=False,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir", default=".", help="Directory to write yaml files to"
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite)
+
+
+if __name__ == "__main__":
+    main()
--- a/lm_eval/tasks/xnli/xnli_ar.yaml
+++ b/lm_eval/tasks/xnli/xnli_ar.yaml
+# Generated by utils.py
+dataset_name: ar
+doc_to_choice: '{{[premise+", صحيح? نعم, "+hypothesis,premise+", صحيح? لذا, "+hypothesis,premise+",
+  صحيح? رقم, "+hypothesis]}}'
+doc_to_text: ''
+include: xnli_common_yaml
+task: xnli_ar
--- a/lm_eval/tasks/xnli/xnli_bg.yaml
+++ b/lm_eval/tasks/xnli/xnli_bg.yaml
+# Generated by utils.py
+dataset_name: bg
+doc_to_choice: '{{[premise+", правилно? да, "+hypothesis,premise+", правилно? така,
+  "+hypothesis,premise+", правилно? не, "+hypothesis]}}'
+doc_to_text: ''
+include: xnli_common_yaml
+task: xnli_bg
--- a/lm_eval/tasks/xnli/xnli_common_yaml
+++ b/lm_eval/tasks/xnli/xnli_common_yaml
+# This file will be included in the generated language-specific task configs.
+# It doesn't have a yaml file extension as it is not meant to be imported directly
+# by the harness.
+group: xnli
+task: null
+dataset_path: xnli
+dataset_name: null
+output_type: multiple_choice
+training_split: train
+validation_split: validation
+doc_to_text: null
+doc_to_target: label
+doc_to_choice: null
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
--- a/lm_eval/tasks/xnli/xnli_de.yaml
+++ b/lm_eval/tasks/xnli/xnli_de.yaml
+# Generated by utils.py
+dataset_name: de
+doc_to_choice: '{{[premise+", richtig? Ja, "+hypothesis,premise+", richtig? Auch,
+  "+hypothesis,premise+", richtig? Nein, "+hypothesis]}}'
+doc_to_text: ''
+include: xnli_common_yaml
+task: xnli_de
--- a/lm_eval/tasks/xnli/xnli_el.yaml
+++ b/lm_eval/tasks/xnli/xnli_el.yaml
+# Generated by utils.py
+dataset_name: el
+doc_to_choice: '{{[premise+", σωστός? Ναί, "+hypothesis,premise+", σωστός? Έτσι, "+hypothesis,premise+",
+  σωστός? όχι, "+hypothesis]}}'
+doc_to_text: ''
+include: xnli_common_yaml
+task: xnli_el
--- a/lm_eval/tasks/xnli/xnli_en.yaml
+++ b/lm_eval/tasks/xnli/xnli_en.yaml
+# Generated by utils.py
+dataset_name: en
+doc_to_choice: '{{[premise+", right? Yes, "+hypothesis,premise+", right? Also, "+hypothesis,premise+",
+  right? No, "+hypothesis]}}'
+doc_to_text: ''
+include: xnli_common_yaml
+task: xnli_en
--- a/lm_eval/tasks/xnli/xnli_es.yaml
+++ b/lm_eval/tasks/xnli/xnli_es.yaml
+# Generated by utils.py
+dataset_name: es
+doc_to_choice: '{{[premise+", correcto? Sí, "+hypothesis,premise+", correcto? Asi
+  que, "+hypothesis,premise+", correcto? No, "+hypothesis]}}'
+doc_to_text: ''
+include: xnli_common_yaml
+task: xnli_es
--- a/lm_eval/tasks/xnli/xnli_fr.yaml
+++ b/lm_eval/tasks/xnli/xnli_fr.yaml
+# Generated by utils.py
+dataset_name: fr
+doc_to_choice: '{{[premise+", correct? Oui, "+hypothesis,premise+", correct? Aussi,
+  "+hypothesis,premise+", correct? Non, "+hypothesis]}}'
+doc_to_text: ''
+include: xnli_common_yaml
+task: xnli_fr