adding challenge sets

d42d5423 · jordiclive · 9cd70235 · d42d5423 · d42d5423 · d42d5423
Commit d42d5423 authored Apr 28, 2022 by jordiclive
Showing with 125 additions and 19 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +1 -1

lm_eval/tasks/gem_webnlg.py lm_eval/tasks/gem_webnlg.py +100 -2

scripts/write_out.py scripts/write_out.py +24 -16

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -110,10 +110,10 @@ TASK_REGISTRY = {
    "wsc": superglue.SGWinogradSchemaChallenge,
    # Order by benchmark/genre?
    "coqa": coqa.CoQA,
-    "GEM/web_nlg": gem_webnlg.WebNLG,
    "drop": drop.DROP,
    "lambada": lambada.LAMBADA,
    "lambada_cloze": lambada_cloze.LAMBADA_cloze,
+    **gem_webnlg.construct_tasks(),
    # multilingual lambada
    **lambada_multilingual.construct_tasks(),
    "wikitext": wikitext.WikiText,

--- a/lm_eval/tasks/gem_webnlg.py
+++ b/lm_eval/tasks/gem_webnlg.py
+"""
+The 2020 Bilingual, Bi-Directional WebNLG+ Shared Task:
+Overview and Evaluation Results (WebNLG+ 2020)
+https://aclanthology.org/2020.webnlg-1.7/
+WebNLG+ offers two challenges: (i) mapping sets of RDF triples
+to English or Russian text (generation) and (ii) converting
+English or Russian text to sets of RDF triples (semantic parsing).
+Compared to the eponymous WebNLG challenge, WebNLG+ provides an
+extended dataset that enable the training, evaluation, and
+comparison of microplanners and semantic parsers. In this paper,
+we present the results of the generation and semantic parsing
+task for both English and Russian and provide a brief
+description of the participating systems.
+"""
 from lm_eval.base import PromptSourceTask
+_CITATION = """
+@inproceedings{castro-ferreira-etal-2020-2020,
+    title = "The 2020 Bilingual, Bi-Directional {W}eb{NLG}+ Shared Task: Overview and Evaluation Results ({W}eb{NLG}+ 2020)",
+    author = "Castro Ferreira, Thiago  and
+      Gardent, Claire  and
+      Ilinykh, Nikolai  and
+      van der Lee, Chris  and
+      Mille, Simon  and
+      Moussallem, Diego  and
+      Shimorina, Anastasia",
+    booktitle = "Proceedings of the 3rd International Workshop on Natural Language Generation from the Semantic Web (WebNLG+)",
+    month = "12",
+    year = "2020",
+    address = "Dublin, Ireland (Virtual)",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2020.webnlg-1.7",
+    pages = "55--76",
+    abstract = "WebNLG+ offers two challenges: (i) mapping sets of RDF triples to English or Russian text (generation) and (ii) converting English or Russian text to sets of RDF triples (semantic parsing). Compared to the eponymous WebNLG challenge, WebNLG+ provides an extended dataset that enable the training, evaluation, and comparison of microplanners and semantic parsers. In this paper, we present the results of the generation and semantic parsing task for both English and Russian and provide a brief description of the participating systems.",
+}
+"""
 class WebNLG(PromptSourceTask):
    VERSION = 0
    DATASET_PATH = "GEM/web_nlg"
    DATASET_NAME = "en"
+    SPLIT = None
    def has_training_docs(self):
        return False
@@ -27,11 +65,71 @@ class WebNLG(PromptSourceTask):
    def test_docs(self):
        if self.has_test_docs():
-            return self.dataset["test"]
+            if self.SPLIT is not None:
+                return self.dataset[str(self.SPLIT)]
+            else:
+                return self.dataset["test"]
    def stopping_criteria(self):
-        return '*'
+        return None
    def max_generation_length(self):
        return 250
+    # def higher_is_better(self):
+    #     return {"bleu": True, "rouge": True}
+class WebNLGRu(WebNLG):
+    DATASET_NAME = "ru"
+## En Challenge Sets
+class WebNLGEn1(WebNLG):
+    SPLIT = "challenge_validation_sample"
+class WebNLGEn2(WebNLG):
+    SPLIT = "challenge_test_scramble"
+class WebNLGEn3(WebNLG):
+    SPLIT = "challenge_test_numbers"
+## Ru Challenge sets
+class WebNLGRu1(WebNLG):
+    DATASET_NAME = "ru"
+    SPLIT = "challenge_validation_sample"
+class WebNLGRu2(WebNLG):
+    DATASET_NAME = "ru"
+    SPLIT = "challenge_test_scramble"
+WEBNLG_CLASSES = [
+    WebNLG,
+    WebNLGRu,
+    WebNLGEn1,
+    WebNLGEn2,
+    WebNLGEn3,
+    WebNLGRu1,
+    WebNLGRu2,
+]
+def construct_tasks():
+    tasks = {}
+    for webnlg_class in WEBNLG_CLASSES:
+        if webnlg_class.SPLIT is None:
+            tasks[f"GEM/web_nlg_{webnlg_class.DATASET_NAME}"] = webnlg_class
+        else:
+            tasks[
+                f"GEM/web_nlg_{webnlg_class.DATASET_NAME}_{webnlg_class.SPLIT}"
+            ] = webnlg_class
+    return tasks
--- a/scripts/write_out.py
+++ b/scripts/write_out.py
@@ -11,14 +11,14 @@ EXAMPLE_DIVIDER = "!!@@##@@!! -- Example {i}\n"
 def parse_args():
    parser = argparse.ArgumentParser()
-    parser.add_argument('--output_base_path', required=True)
+    parser.add_argument("--output_base_path", required=True)
-    parser.add_argument('--tasks', default="all_tasks")
+    parser.add_argument("--tasks", default="all_tasks")
-    parser.add_argument('--provide_description', action="store_true")
+    parser.add_argument("--provide_description", action="store_true")
-    parser.add_argument('--sets', type=str, default="val") # example: val,test
+    parser.add_argument("--sets", type=str, default="val")  # example: val,test
-    parser.add_argument('--num_fewshot', type=int, default=1)
+    parser.add_argument("--num_fewshot", type=int, default=1)
-    parser.add_argument('--seed', type=int, default=42)
+    parser.add_argument("--seed", type=int, default=42)
-    parser.add_argument('--num_examples', type=int, default=1)
+    parser.add_argument("--num_examples", type=int, default=1)
-    parser.add_argument('--description_dict_path', default=None)
+    parser.add_argument("--description_dict_path", default=None)
    return parser.parse_args()
@@ -34,7 +34,7 @@ def main():
    description_dict = {}
    if args.description_dict_path:
-        with open(args.description_dict_path, 'r') as f:
+        with open(args.description_dict_path, "r") as f:
            description_dict = json.load(f)
    os.makedirs(args.output_base_path, exist_ok=True)
@@ -45,26 +45,34 @@ def main():
        iters = []
        for set in args.sets.split(","):
-            if set == 'train' and task.has_training_docs():
+            if set == "train" and task.has_training_docs():
                docs = task.training_docs()
-            if set == 'val' and task.has_validation_docs():
+            if set == "val" and task.has_validation_docs():
                docs = task.validation_docs()
-            if set == 'test' and task.has_test_docs():
+            if set == "test" and task.has_test_docs():
                docs = task.test_docs()
            iters.append(docs)
        docs = join_iters(iters)
-        description = description_dict[task_name] if description_dict and task_name in description_dict else ""
+        description = (
-        task_name = task_name.replace('/','_')
+            description_dict[task_name]
+            if description_dict and task_name in description_dict
+            else ""
+        )
+        task_name = task_name.replace("/", "_")
        with open(os.path.join(args.output_base_path, task_name), "w") as f:
-            for i, doc in zip(range(args.num_examples), docs) if args.num_examples > 0 else enumerate(docs):
+            for i, doc in (
+                zip(range(args.num_examples), docs)
+                if args.num_examples > 0
+                else enumerate(docs)
+            ):
                f.write(EXAMPLE_DIVIDER.format(i=i))
                ctx, _ = task.fewshot_context(
                    doc=doc,
                    num_fewshot=args.num_fewshot,
                    rnd=rnd,
-                    description=description
+                    description=description,
                )
                f.write(ctx + "\n")