Commit d42d5423 authored by jordiclive's avatar jordiclive
Browse files

adding challenge sets

parent 9cd70235
...@@ -110,10 +110,10 @@ TASK_REGISTRY = { ...@@ -110,10 +110,10 @@ TASK_REGISTRY = {
"wsc": superglue.SGWinogradSchemaChallenge, "wsc": superglue.SGWinogradSchemaChallenge,
# Order by benchmark/genre? # Order by benchmark/genre?
"coqa": coqa.CoQA, "coqa": coqa.CoQA,
"GEM/web_nlg": gem_webnlg.WebNLG,
"drop": drop.DROP, "drop": drop.DROP,
"lambada": lambada.LAMBADA, "lambada": lambada.LAMBADA,
"lambada_cloze": lambada_cloze.LAMBADA_cloze, "lambada_cloze": lambada_cloze.LAMBADA_cloze,
**gem_webnlg.construct_tasks(),
# multilingual lambada # multilingual lambada
**lambada_multilingual.construct_tasks(), **lambada_multilingual.construct_tasks(),
"wikitext": wikitext.WikiText, "wikitext": wikitext.WikiText,
......
"""
The 2020 Bilingual, Bi-Directional WebNLG+ Shared Task:
Overview and Evaluation Results (WebNLG+ 2020)
https://aclanthology.org/2020.webnlg-1.7/
WebNLG+ offers two challenges: (i) mapping sets of RDF triples
to English or Russian text (generation) and (ii) converting
English or Russian text to sets of RDF triples (semantic parsing).
Compared to the eponymous WebNLG challenge, WebNLG+ provides an
extended dataset that enable the training, evaluation, and
comparison of microplanners and semantic parsers. In this paper,
we present the results of the generation and semantic parsing
task for both English and Russian and provide a brief
description of the participating systems.
"""
from lm_eval.base import PromptSourceTask from lm_eval.base import PromptSourceTask
_CITATION = """
@inproceedings{castro-ferreira-etal-2020-2020,
title = "The 2020 Bilingual, Bi-Directional {W}eb{NLG}+ Shared Task: Overview and Evaluation Results ({W}eb{NLG}+ 2020)",
author = "Castro Ferreira, Thiago and
Gardent, Claire and
Ilinykh, Nikolai and
van der Lee, Chris and
Mille, Simon and
Moussallem, Diego and
Shimorina, Anastasia",
booktitle = "Proceedings of the 3rd International Workshop on Natural Language Generation from the Semantic Web (WebNLG+)",
month = "12",
year = "2020",
address = "Dublin, Ireland (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.webnlg-1.7",
pages = "55--76",
abstract = "WebNLG+ offers two challenges: (i) mapping sets of RDF triples to English or Russian text (generation) and (ii) converting English or Russian text to sets of RDF triples (semantic parsing). Compared to the eponymous WebNLG challenge, WebNLG+ provides an extended dataset that enable the training, evaluation, and comparison of microplanners and semantic parsers. In this paper, we present the results of the generation and semantic parsing task for both English and Russian and provide a brief description of the participating systems.",
}
"""
class WebNLG(PromptSourceTask): class WebNLG(PromptSourceTask):
VERSION = 0 VERSION = 0
DATASET_PATH = "GEM/web_nlg" DATASET_PATH = "GEM/web_nlg"
DATASET_NAME = "en" DATASET_NAME = "en"
SPLIT = None
def has_training_docs(self): def has_training_docs(self):
return False return False
...@@ -27,11 +65,71 @@ class WebNLG(PromptSourceTask): ...@@ -27,11 +65,71 @@ class WebNLG(PromptSourceTask):
def test_docs(self): def test_docs(self):
if self.has_test_docs(): if self.has_test_docs():
return self.dataset["test"] if self.SPLIT is not None:
return self.dataset[str(self.SPLIT)]
else:
return self.dataset["test"]
def stopping_criteria(self): def stopping_criteria(self):
return '*' return None
def max_generation_length(self): def max_generation_length(self):
return 250 return 250
# def higher_is_better(self):
# return {"bleu": True, "rouge": True}
class WebNLGRu(WebNLG):
DATASET_NAME = "ru"
## En Challenge Sets
class WebNLGEn1(WebNLG):
SPLIT = "challenge_validation_sample"
class WebNLGEn2(WebNLG):
SPLIT = "challenge_test_scramble"
class WebNLGEn3(WebNLG):
SPLIT = "challenge_test_numbers"
## Ru Challenge sets
class WebNLGRu1(WebNLG):
DATASET_NAME = "ru"
SPLIT = "challenge_validation_sample"
class WebNLGRu2(WebNLG):
DATASET_NAME = "ru"
SPLIT = "challenge_test_scramble"
WEBNLG_CLASSES = [
WebNLG,
WebNLGRu,
WebNLGEn1,
WebNLGEn2,
WebNLGEn3,
WebNLGRu1,
WebNLGRu2,
]
def construct_tasks():
tasks = {}
for webnlg_class in WEBNLG_CLASSES:
if webnlg_class.SPLIT is None:
tasks[f"GEM/web_nlg_{webnlg_class.DATASET_NAME}"] = webnlg_class
else:
tasks[
f"GEM/web_nlg_{webnlg_class.DATASET_NAME}_{webnlg_class.SPLIT}"
] = webnlg_class
return tasks
...@@ -11,14 +11,14 @@ EXAMPLE_DIVIDER = "!!@@##@@!! -- Example {i}\n" ...@@ -11,14 +11,14 @@ EXAMPLE_DIVIDER = "!!@@##@@!! -- Example {i}\n"
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--output_base_path', required=True) parser.add_argument("--output_base_path", required=True)
parser.add_argument('--tasks', default="all_tasks") parser.add_argument("--tasks", default="all_tasks")
parser.add_argument('--provide_description', action="store_true") parser.add_argument("--provide_description", action="store_true")
parser.add_argument('--sets', type=str, default="val") # example: val,test parser.add_argument("--sets", type=str, default="val") # example: val,test
parser.add_argument('--num_fewshot', type=int, default=1) parser.add_argument("--num_fewshot", type=int, default=1)
parser.add_argument('--seed', type=int, default=42) parser.add_argument("--seed", type=int, default=42)
parser.add_argument('--num_examples', type=int, default=1) parser.add_argument("--num_examples", type=int, default=1)
parser.add_argument('--description_dict_path', default=None) parser.add_argument("--description_dict_path", default=None)
return parser.parse_args() return parser.parse_args()
...@@ -34,7 +34,7 @@ def main(): ...@@ -34,7 +34,7 @@ def main():
description_dict = {} description_dict = {}
if args.description_dict_path: if args.description_dict_path:
with open(args.description_dict_path, 'r') as f: with open(args.description_dict_path, "r") as f:
description_dict = json.load(f) description_dict = json.load(f)
os.makedirs(args.output_base_path, exist_ok=True) os.makedirs(args.output_base_path, exist_ok=True)
...@@ -45,26 +45,34 @@ def main(): ...@@ -45,26 +45,34 @@ def main():
iters = [] iters = []
for set in args.sets.split(","): for set in args.sets.split(","):
if set == 'train' and task.has_training_docs(): if set == "train" and task.has_training_docs():
docs = task.training_docs() docs = task.training_docs()
if set == 'val' and task.has_validation_docs(): if set == "val" and task.has_validation_docs():
docs = task.validation_docs() docs = task.validation_docs()
if set == 'test' and task.has_test_docs(): if set == "test" and task.has_test_docs():
docs = task.test_docs() docs = task.test_docs()
iters.append(docs) iters.append(docs)
docs = join_iters(iters) docs = join_iters(iters)
description = description_dict[task_name] if description_dict and task_name in description_dict else "" description = (
task_name = task_name.replace('/','_') description_dict[task_name]
if description_dict and task_name in description_dict
else ""
)
task_name = task_name.replace("/", "_")
with open(os.path.join(args.output_base_path, task_name), "w") as f: with open(os.path.join(args.output_base_path, task_name), "w") as f:
for i, doc in zip(range(args.num_examples), docs) if args.num_examples > 0 else enumerate(docs): for i, doc in (
zip(range(args.num_examples), docs)
if args.num_examples > 0
else enumerate(docs)
):
f.write(EXAMPLE_DIVIDER.format(i=i)) f.write(EXAMPLE_DIVIDER.format(i=i))
ctx, _ = task.fewshot_context( ctx, _ = task.fewshot_context(
doc=doc, doc=doc,
num_fewshot=args.num_fewshot, num_fewshot=args.num_fewshot,
rnd=rnd, rnd=rnd,
description=description description=description,
) )
f.write(ctx + "\n") f.write(ctx + "\n")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment