Commit 40f81c42 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

add translation tasks

parent 39bd5caf
# Translation Tasks
### Paper
### Citation
```
```
### Groups and Tasks
#### Groups
* `gpt3_translation_tasks`
* `wmt14`
* `wmt16`
* `wmt20`
* `iwslt17`
#### Tasks
*
### Checklist
For adding novel benchmarks/datasets to the library:
* [x] Is the task an existing benchmark in the literature?
* [ ] Have you referenced the original paper that introduced the task?
* [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
If other tasks on this dataset are already supported:
* [x] Is the "Main" variant of this task clearly denoted?
* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
* [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
* [ ] Checked for equivalence with v0.3.0 LM Evaluation Harness
# Generated by utils.py
dataset_name: iwslt2017-ar-en
dataset_path: iwslt2017
doc_to_target: ' {{translation["en"]}}'
doc_to_text: 'Arabic phrase: {{translation["ar"]}}
English phrase:'
group:
- greedy_until
- translation
- iwslt2017
include: wmt_common_yaml
task: iwslt2017-ar-en
# Generated by utils.py
dataset_name: iwslt2017-en-ar
dataset_path: iwslt2017
doc_to_target: ' {{translation["ar"]}}'
doc_to_text: 'English phrase: {{translation["en"]}}
Arabic phrase:'
group:
- greedy_until
- translation
- iwslt2017
include: wmt_common_yaml
task: iwslt2017-en-ar
import argparse
from typing import Dict, List
import yaml
import sacrebleu
try:
import pycountry
except ModuleNotFoundError:
raise Exception(
"`pycountry` is required for generating translation task prompt templates. \
please install pycountry via pip install lm-eval[multilingua] or pip install -e .[multilingual]",
)
# Different translation benchmarks included in the library. Mostly WMT.
# These correspond to dataset names (subsets) on HuggingFace for each dataset.
# A yaml file is generated by this script for each language pair.
gpt3_translation_benchmarks = {
"wmt14": ["fr-en"], # ["en-fr", "fr-en"], # French
"wmt16": [
"ro-en",
"de-en",
], # ["en-ro", "ro-en", "de-en", "en-de"], # German, Romanian
}
# 28 total
LANGUAGES = {
**gpt3_translation_benchmarks,
# "wmt20": sacrebleu.get_langpairs_for_testset("wmt20"),
"iwslt2017": ["en-ar", "ar-en"], # Arabic
}
def code_to_language(code):
# key is alpha_2 or alpha_3 depending on the code length
language_tuple = pycountry.languages.get(**{f"alpha_{len(code)}": code})
return language_tuple.name
def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
"""
Generate a yaml file for each language.
:param output_dir: The directory to output the files to.
:param overwrite: Whether to overwrite files if they already exist.
"""
err = []
for lang in LANGUAGES.keys():
for lang_pair in LANGUAGES[lang]:
file_name = f"{lang}_{lang_pair}.yaml"
try:
src_lang, _, tgt_lang = lang_pair.partition("-")
source, target = code_to_language(src_lang), code_to_language(tgt_lang)
groups = ["greedy_until", "translation", lang]
if lang in gpt3_translation_benchmarks.keys():
groups += ["gpt3_translation_benchmarks"]
with open(
f"{output_dir}/{file_name}",
"w" if overwrite else "x",
encoding="utf8",
) as f:
f.write("# Generated by utils.py\n")
yaml.dump(
{
"include": "wmt_common_yaml",
"group": groups,
"dataset_path": lang,
"dataset_name": lang_pair
if not (lang == "iwslt2017")
else "iwslt2017-" + lang_pair,
"task": f"{lang}-{lang_pair}",
"doc_to_text": f"{source} phrase: "
+ "{{translation["
+ f'"{src_lang}"'
+ "]}}\n"
+ f"{target} phrase:",
"doc_to_target": " {{"
+ "translation["
+ f'"{tgt_lang}"]'
+ "}}",
},
f,
)
except FileExistsError:
err.append(file_name)
if len(err) > 0:
raise FileExistsError(
"Files were not created because they already exist (use --overwrite flag):"
f" {', '.join(err)}"
)
def main() -> None:
"""Parse CLI args and generate language-specific yaml files."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--overwrite",
default=False,
action="store_true",
help="Overwrite files if they already exist",
)
parser.add_argument(
"--output-dir", default=".", help="Directory to write yaml files to"
)
args = parser.parse_args()
gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite)
if __name__ == "__main__":
main()
# Generated by utils.py
dataset_name: fr-en
dataset_path: wmt14
doc_to_target: ' {{translation["en"]}}'
doc_to_text: 'French phrase: {{translation["fr"]}}
English phrase:'
group:
- greedy_until
- translation
- wmt14
- gpt3_translation_benchmarks
include: wmt_common_yaml
task: wmt14-fr-en
# Generated by utils.py
dataset_name: de-en
dataset_path: wmt16
doc_to_target: ' {{translation["en"]}}'
doc_to_text: 'German phrase: {{translation["de"]}}
English phrase:'
group:
- greedy_until
- translation
- wmt16
- gpt3_translation_benchmarks
include: wmt_common_yaml
task: wmt16-de-en
# Generated by utils.py
dataset_name: ro-en
dataset_path: wmt16
doc_to_target: ' {{translation["en"]}}'
doc_to_text: 'Romanian phrase: {{translation["ro"]}}
English phrase:'
group:
- greedy_until
- translation
- wmt16
- gpt3_translation_benchmarks
include: wmt_common_yaml
task: wmt16-ro-en
output_type: greedy_until
training_split: train
validation_split: validation
fewshot_split: validation
test_split: test
metric_list:
- metric: bleu
- metric: ter
- metric: chrf
generation_kwargs:
until:
- "\n"
do_sample: false
temperature: 0.0
repeats: 1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment