Unverified Commit f918c8fd authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge pull request #776 from EleutherAI/xnli

[Refactor] XNLI
parents 054ed37f 1768f118
......@@ -55,7 +55,7 @@ Boxes should be checked iff tasks are implemented in the refactor and tested for
- [ ] XStoryCloze (Lintang)
- [x] XWinograd
- [ ] PAWS-X (Lintang)
- [ ] XNLI (Lintang)
- [x] XNLI
- [ ] MGSM (Lintang)
- [ ] SCROLLS
- [x] Babi
......
# XNLI
### Paper
Title: `XNLI: Evaluating Cross-lingual Sentence Representations`
Abstract: https://arxiv.org/abs/1809.05053
Based on the implementation of @yongzx (see https://github.com/EleutherAI/lm-evaluation-harness/pull/258)
Prompt format (same as XGLM and mGPT):
sentence1 + ", right? " + mask = (Yes|Also|No) + ", " + sentence2
Predicition is the full sequence with the highest likelihood.
Language specific prompts are translated word-by-word with Google Translate
and may differ from the ones used by mGPT and XGLM (they do not provide their prompts).
Homepage: https://github.com/facebookresearch/XNLI
### Citation
"""
@InProceedings{conneau2018xnli,
author = "Conneau, Alexis
and Rinott, Ruty
and Lample, Guillaume
and Williams, Adina
and Bowman, Samuel R.
and Schwenk, Holger
and Stoyanov, Veselin",
title = "XNLI: Evaluating Cross-lingual Sentence Representations",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods
in Natural Language Processing",
year = "2018",
publisher = "Association for Computational Linguistics",
location = "Brussels, Belgium",
}
"""
### Groups and Tasks
#### Groups
* `xnli`
#### Tasks
* `xnli_ar`: Arabic
* `xnli_bg`: Bulgarian
* `xnli_de`: German
* `xnli_el`: Greek
* `xnli_en`: English
* `xnli_es`: Spanish
* `xnli_fr`: French
* `xnli_hi`: Hindi
* `xnli_ru`: Russian
* `xnli_sw`: Swahili
* `xnli_th`: Thai
* `xnli_tr`: Turkish
* `xnli_ur`: Urdu
* `xnli_vi`: Vietnamese
* `xnli_zh`: Chinese
### Checklist
For adding novel benchmarks/datasets to the library:
* [ ] Is the task an existing benchmark in the literature?
* [ ] Have you referenced the original paper that introduced the task?
* [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
If other tasks on this dataset are already supported:
* [ ] Is the "Main" variant of this task clearly denoted?
* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
* [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
import argparse
from typing import Dict, List
import yaml
# Different languages that are part of xnli.
# These correspond to dataset names (Subsets) on HuggingFace.
# A yaml file is generated by this script for each language.
LANGUAGES = {
"ar": { # Arabic
"QUESTION_WORD": "صحيح",
"ENTAILMENT_LABEL": "نعم",
"NEUTRAL_LABEL": "لذا",
"CONTRADICTION_LABEL": "رقم",
},
"bg": { # Bulgarian
"QUESTION_WORD": "правилно",
"ENTAILMENT_LABEL": "да",
"NEUTRAL_LABEL": "така",
"CONTRADICTION_LABEL": "не",
},
"de": { # German
"QUESTION_WORD": "richtig",
"ENTAILMENT_LABEL": "Ja",
"NEUTRAL_LABEL": "Auch",
"CONTRADICTION_LABEL": "Nein",
},
"el": { # Greek
"QUESTION_WORD": "σωστός",
"ENTAILMENT_LABEL": "Ναί",
"NEUTRAL_LABEL": "Έτσι",
"CONTRADICTION_LABEL": "όχι",
},
"en": { # English
"QUESTION_WORD": "right",
"ENTAILMENT_LABEL": "Yes",
"NEUTRAL_LABEL": "Also",
"CONTRADICTION_LABEL": "No",
},
"es": { # Spanish
"QUESTION_WORD": "correcto",
"ENTAILMENT_LABEL": "Sí",
"NEUTRAL_LABEL": "Asi que",
"CONTRADICTION_LABEL": "No",
},
"fr": { # French
"QUESTION_WORD": "correct",
"ENTAILMENT_LABEL": "Oui",
"NEUTRAL_LABEL": "Aussi",
"CONTRADICTION_LABEL": "Non",
},
"hi": { # Hindi
"QUESTION_WORD": "सही",
"ENTAILMENT_LABEL": "हाँ",
"NEUTRAL_LABEL": "इसलिए",
"CONTRADICTION_LABEL": "नहीं",
},
"ru": { # Russian
"QUESTION_WORD": "правильно",
"ENTAILMENT_LABEL": "Да",
"NEUTRAL_LABEL": "Так",
"CONTRADICTION_LABEL": "Нет",
},
"sw": { # Swahili
"QUESTION_WORD": "sahihi",
"ENTAILMENT_LABEL": "Ndiyo",
"NEUTRAL_LABEL": "Hivyo",
"CONTRADICTION_LABEL": "Hapana",
},
"th": { # Thai
"QUESTION_WORD": "ถูกต้อง",
"ENTAILMENT_LABEL": "ใช่",
"NEUTRAL_LABEL": "ดังนั้น",
"CONTRADICTION_LABEL": "ไม่",
},
"tr": { # Turkish
"QUESTION_WORD": "doğru",
"ENTAILMENT_LABEL": "Evet",
"NEUTRAL_LABEL": "Böylece",
"CONTRADICTION_LABEL": "Hayır",
},
"ur": { # Urdu
"QUESTION_WORD": "صحیح",
"ENTAILMENT_LABEL": "جی ہاں",
"NEUTRAL_LABEL": "اس لئے",
"CONTRADICTION_LABEL": "نہیں",
},
"vi": { # Vietnamese
"QUESTION_WORD": "đúng",
"ENTAILMENT_LABEL": "Vâng",
"NEUTRAL_LABEL": "Vì vậy",
"CONTRADICTION_LABEL": "Không",
},
"zh": { # Chinese
"QUESTION_WORD": "正确",
"ENTAILMENT_LABEL": "是的",
"NEUTRAL_LABEL": "所以",
"CONTRADICTION_LABEL": "不是的",
},
}
def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
"""
Generate a yaml file for each language.
:param output_dir: The directory to output the files to.
:param overwrite: Whether to overwrite files if they already exist.
"""
err = []
for lang in LANGUAGES.keys():
file_name = f"xnli_{lang}.yaml"
try:
QUESTION_WORD = LANGUAGES[lang]["QUESTION_WORD"]
ENTAILMENT_LABEL = LANGUAGES[lang]["ENTAILMENT_LABEL"]
NEUTRAL_LABEL = LANGUAGES[lang]["NEUTRAL_LABEL"]
CONTRADICTION_LABEL = LANGUAGES[lang]["CONTRADICTION_LABEL"]
with open(
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
) as f:
f.write("# Generated by utils.py\n")
yaml.dump(
{
"include": "xnli_common_yaml",
"dataset_name": lang,
"task": f"xnli_{lang}",
"doc_to_text": "",
"doc_to_choice": f"{{{{["
f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,"""
f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,"""
f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis"""
f"]}}}}",
},
f,
allow_unicode=True,
)
except FileExistsError:
err.append(file_name)
if len(err) > 0:
raise FileExistsError(
"Files were not created because they already exist (use --overwrite flag):"
f" {', '.join(err)}"
)
def main() -> None:
"""Parse CLI args and generate language-specific yaml files."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--overwrite",
default=False,
action="store_true",
help="Overwrite files if they already exist",
)
parser.add_argument(
"--output-dir", default=".", help="Directory to write yaml files to"
)
args = parser.parse_args()
gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite)
if __name__ == "__main__":
main()
# Generated by utils.py
dataset_name: ar
doc_to_choice: '{{[premise+", صحيح? نعم, "+hypothesis,premise+", صحيح? لذا, "+hypothesis,premise+",
صحيح? رقم, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_ar
# Generated by utils.py
dataset_name: bg
doc_to_choice: '{{[premise+", правилно? да, "+hypothesis,premise+", правилно? така,
"+hypothesis,premise+", правилно? не, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_bg
# This file will be included in the generated language-specific task configs.
# It doesn't have a yaml file extension as it is not meant to be imported directly
# by the harness.
group: xnli
task: null
dataset_path: xnli
dataset_name: null
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: null
doc_to_target: label
doc_to_choice: null
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
# Generated by utils.py
dataset_name: de
doc_to_choice: '{{[premise+", richtig? Ja, "+hypothesis,premise+", richtig? Auch,
"+hypothesis,premise+", richtig? Nein, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_de
# Generated by utils.py
dataset_name: el
doc_to_choice: '{{[premise+", σωστός? Ναί, "+hypothesis,premise+", σωστός? Έτσι, "+hypothesis,premise+",
σωστός? όχι, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_el
# Generated by utils.py
dataset_name: en
doc_to_choice: '{{[premise+", right? Yes, "+hypothesis,premise+", right? Also, "+hypothesis,premise+",
right? No, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_en
# Generated by utils.py
dataset_name: es
doc_to_choice: '{{[premise+", correcto? Sí, "+hypothesis,premise+", correcto? Asi
que, "+hypothesis,premise+", correcto? No, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_es
# Generated by utils.py
dataset_name: fr
doc_to_choice: '{{[premise+", correct? Oui, "+hypothesis,premise+", correct? Aussi,
"+hypothesis,premise+", correct? Non, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_fr
# Generated by utils.py
dataset_name: hi
doc_to_choice: '{{[premise+", सही? हाँ, "+hypothesis,premise+", सही? इसलिए, "+hypothesis,premise+",
सही? नहीं, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_hi
# Generated by utils.py
dataset_name: ru
doc_to_choice: '{{[premise+", правильно? Да, "+hypothesis,premise+", правильно? Так,
"+hypothesis,premise+", правильно? Нет, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_ru
# Generated by utils.py
dataset_name: sw
doc_to_choice: '{{[premise+", sahihi? Ndiyo, "+hypothesis,premise+", sahihi? Hivyo,
"+hypothesis,premise+", sahihi? Hapana, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_sw
# Generated by utils.py
dataset_name: th
doc_to_choice: '{{[premise+", ถูกต้อง? ใช่, "+hypothesis,premise+", ถูกต้อง? ดังนั้น,
"+hypothesis,premise+", ถูกต้อง? ไม่, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_th
# Generated by utils.py
dataset_name: tr
doc_to_choice: '{{[premise+", doğru? Evet, "+hypothesis,premise+", doğru? Böylece,
"+hypothesis,premise+", doğru? Hayır, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_tr
# Generated by utils.py
dataset_name: ur
doc_to_choice: '{{[premise+", صحیح? جی ہاں, "+hypothesis,premise+", صحیح? اس لئے,
"+hypothesis,premise+", صحیح? نہیں, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_ur
# Generated by utils.py
dataset_name: vi
doc_to_choice: '{{[premise+", đúng? Vâng, "+hypothesis,premise+", đúng? vậy, "+hypothesis,premise+",
đúng? Không, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_vi
# Generated by utils.py
dataset_name: zh
doc_to_choice: '{{[premise+", 正确? 是的, "+hypothesis,premise+", 正确? 所以, "+hypothesis,premise+",
正确? 不是的, "+hypothesis]}}'
doc_to_text: ''
include: xnli_common_yaml
task: xnli_zh
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment