Commit 94563a36 authored by Eddy Yeo's avatar Eddy Yeo
Browse files

[Refactor] Migrate xwinograd tasks to yaml

parent 2820042d
......@@ -471,6 +471,9 @@ class Task(abc.ABC):
return labeled_examples + example
elif type(example) == list:
return [labeled_examples + ex for ex in example]
elif type(example) == int:
choices = self.doc_to_choice(doc)
return labeled_examples + choices[example]
def apply_filters(self):
......
# Task-name
### Paper
Title: `It's All in the Heads: Using Attention Heads as a Baseline for Cross-Lingual Transfer in Commonsense Reasoning`
Abstract: `https://arxiv.org/abs/2106.12066`
Multilingual winograd schema challenge that includes English, French, Japanese, Portuguese, Russian and Chinese. Winograd schema challenges come from the XWinograd dataset introduced in Tikhonov et al. As it only contains 16 Chinese schemas, we add 488 Chinese schemas from clue/cluewsc2020.
Homepage: `https://huggingface.co/datasets/Muennighoff/xwinograd`
### Citation
```
@misc{muennighoff2022crosslingual,
title={Crosslingual Generalization through Multitask Finetuning},
author={Niklas Muennighoff and Thomas Wang and Lintang Sutawika and Adam Roberts and Stella Biderman and Teven Le Scao and M Saiful Bari and Sheng Shen and Zheng-Xin Yong and Hailey Schoelkopf and Xiangru Tang and Dragomir Radev and Alham Fikri Aji and Khalid Almubarak and Samuel Albanie and Zaid Alyafeai and Albert Webson and Edward Raff and Colin Raffel},
year={2022},
eprint={2211.01786},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{tikhonov2021heads,
title={It's All in the Heads: Using Attention Heads as a Baseline for Cross-Lingual Transfer in Commonsense Reasoning},
author={Alexey Tikhonov and Max Ryabinin},
year={2021},
eprint={2106.12066},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
```
### Subtasks
List or describe tasks defined in this folder, and their names here:
* `xwinograd_en`: Winograd schema challenges in English.
* `xwinograd_fr`: Winograd schema challenges in French.
* `xwinograd_jp`: Winograd schema challenges in Japanese.
* `xwinograd_pt`: Winograd schema challenges in Portuguese.
* `xwinograd_ru`: Winograd schema challenges in Russian.
* `xwinograd_zh`: Winograd schema challenges in Chinese.
### Checklist
For adding novel benchmarks/datasets to the library:
* [x] Is the task an existing benchmark in the literature?
* [x] Have you referenced the original paper that introduced the task?
* [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
If other tasks on this dataset are already supported:
* [ ] Is the "Main" variant of this task clearly denoted?
* [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
* [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
import argparse
from typing import Dict, List
import yaml
# Different languages that are part of xwinograd.
# These correspond to dataset names (Subsets) on HuggingFace.
# A yaml file is generated by this script for each language.
LANGUAGES = ["en", "fr", "jp", "pt", "ru", "zh"]
def doc_to_text(doc: Dict) -> int:
"""
Return index of the correct choice.
Note: We are using the "multiple input" mode of the multiple-choice
output-type, which means we use different contexts with the same target
for the different choices, rather than the same context and different targets.
"""
answer_to_num = {"1": 0, "2": 1}
return answer_to_num[doc["answer"]]
def doc_to_target(doc: Dict) -> str:
"""
Return the target completion.
Note that this does not depend on the correct choice as we are using
"multiple input" mode.
"""
idx = doc["sentence"].index("_") + 1
return doc["sentence"][idx:].strip()
def doc_to_choice(doc: Dict) -> List[str]:
"""Return the choices that will be used as contexts in "multiple input" mode."""
idx = doc["sentence"].index("_")
options = [doc["option1"], doc["option2"]]
return [doc["sentence"][:idx] + opt for opt in options]
def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
"""
Generate a yaml file for each language.
:param output_dir: The directory to output the files to.
:param overwrite: Whether to overwrite files if they already exist.
"""
err = []
for lang in LANGUAGES:
file_name = f"xwinograd_{lang}.yaml"
try:
with open(f"{output_dir}/{file_name}", "w" if overwrite else "x") as f:
f.write("# Generated by utils.py\n")
yaml.dump(
{
"include": "xwinograd_common_yaml",
"dataset_name": lang,
"task": f"xwinograd_{lang}",
},
f,
)
except FileExistsError:
err.append(file_name)
if len(err) > 0:
raise FileExistsError(
"Files were not created because they already exist (use --overwrite flag):"
f" {', '.join(err)}"
)
def main() -> None:
"""Parse CLI args and generate language-specific yaml files."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--overwrite",
default=False,
action="store_true",
help="Overwrite files if they already exist",
)
parser.add_argument(
"--output-dir", default=".", help="Directory to write yaml files to"
)
args = parser.parse_args()
gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite)
if __name__ == "__main__":
main()
# This file will be included in the generated language-specific task configs.
# It doesn't have a yaml file extension as it is not meant to be imported directly
# by the harness.
group:
- winograd
- commonsense
- multilingual
dataset_path: Muennighoff/xwinograd
dataset_name: null # Overridden by language-specific config.
output_type: multiple_choice
training_split: null
validation_split: null
test_split: test
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
doc_to_choice: !function utils.doc_to_choice
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
# Generated by utils.py
dataset_name: en
include: xwinograd_common_yaml
task: xwinograd_en
# Generated by utils.py
dataset_name: fr
include: xwinograd_common_yaml
task: xwinograd_fr
# Generated by utils.py
dataset_name: jp
include: xwinograd_common_yaml
task: xwinograd_jp
# Generated by utils.py
dataset_name: pt
include: xwinograd_common_yaml
task: xwinograd_pt
# Generated by utils.py
dataset_name: ru
include: xwinograd_common_yaml
task: xwinograd_ru
# Generated by utils.py
dataset_name: zh
include: xwinograd_common_yaml
task: xwinograd_zh
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment