Merge pull request #19 from kasnerz/kasnerz/generation_tasks

Add E2E NLG Cleaned, update required Transformers version

Merge pull request #19 from kasnerz/kasnerz/generation_tasks
Add E2E NLG Cleaned, update required Transformers version
9d60dd76 · Charles Lovering · GitHub · d47f4534 · 57737691 · 9d60dd76
Unverified Commit 9d60dd76 authored Apr 28, 2022 by Charles Lovering Committed by GitHub Apr 28, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 103 additions and 3 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +2 -2

lm_eval/tasks/e2e_nlg_cleaned.py lm_eval/tasks/e2e_nlg_cleaned.py +100 -0

setup.py setup.py +1 -1

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -56,8 +56,8 @@ from . import hans
 from . import gem_webnlg
 from . import gem_xsum
 from . import gem_mlsum
+from . import e2e_nlg_cleaned
 from . import gem_asset_turk
-# from . import e2e_nlg_cleaned

 ########################################
 # Translation tasks
@@ -127,7 +127,7 @@ TASK_REGISTRY = {
    # Science related
    "pubmedqa": pubmedqa.Pubmed_QA,
    "sciq": sciq.SciQ,
-    # "e2e_nlg_cleaned": e2e_nlg_cleaned.E2E_NLG_Cleaned,
+    "e2e_nlg_cleaned": e2e_nlg_cleaned.E2E_NLG_Cleaned,
    "qasper": qasper.QASPER,
    "qa4mre_2011": qa4mre.QA4MRE_2011,
    "qa4mre_2012": qa4mre.QA4MRE_2012,

--- a/lm_eval/tasks/e2e_nlg_cleaned.py
+++ b/lm_eval/tasks/e2e_nlg_cleaned.py
+"""
+Semantic Noise Matters for Neural Natural Language Generation
+http://arxiv.org/abs/1911.03905
+
+A cleaned version of the dataset from the E2E NLG Challenge.
+The dataset contains MR with restaurant attributes and corresponding descriptions.
+
+Homepage: https://github.com/tuetschek/e2e-cleaning
+"""
+from lm_eval.base import PromptSourceTask, rf
+from lm_eval import metrics
+
+_CITATION = """
+@inproceedings{dusek-etal-2019-semantic,
+    title = "Semantic Noise Matters for Neural Natural Language Generation",
+    author = "Du{\v{s}}ek, Ond{\v{r}}ej  and
+      Howcroft, David M.  and
+      Rieser, Verena",
+    booktitle = "Proceedings of the 12th International Conference on Natural Language Generation",
+    year = "2019",
+    address = "Tokyo, Japan",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/W19-8652",
+    doi = "10.18653/v1/W19-8652",
+    pages = "421--426",
+}
+"""
+
+# Work in progress
+class E2E_NLG_Cleaned(PromptSourceTask):
+    VERSION = 0
+    DATASET_PATH = "e2e_nlg_cleaned"
+    DATASET_NAME = None
+
+    def has_training_docs(self):
+        return True
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        return True
+
+    def training_docs(self):
+        if self.has_training_docs():
+            # We cache training documents in `self._training_docs` for faster
+            # few-shot processing. If the data is too large to fit in memory,
+            # return the training data as a generator instead of a list.
+            if self._training_docs is None:
+                self._training_docs = list(self.dataset["train"])
+            return self._training_docs
+
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.dataset["validation"]
+
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset["test"]
+
+    def max_generation_length(self):
+        return 64
+
+    # def stopping_criteria(self):
+    #     return '\n\n'
+
+    def invalid_doc_for_prompt(self, doc) -> bool:
+        """The QA prompts are not applicable to all the examples, we want to filter these out."""
+        return self.prompt.name.endswith("_qa") or self.prompt.name == "family_friendly_yes_no"
+
+    def doc_to_text(self, doc) -> str:
+        # if the response is not defined in PS, the text will be a single-element list containing an empty string
+        text = self.prompt.apply(doc)[0]
+        return text
+
+    def construct_requests(self, doc, ctx):
+        """Uses RequestFactory to construct Requests and returns an iterable of
+        Requests which will be sent to the LM.
+
+        :param doc:
+            The document as returned from training_docs, validation_docs, or test_docs.
+        :param ctx: str
+            The context string, generated by fewshot_context. This includes the natural
+            language description, as well as the few shot examples, and the question
+            part of the document for `doc`.
+        """
+        _requests = []
+
+        # NOTE: In the future, target will be a list of strings.
+        request_args = {
+            "stopping_criteria": self.stopping_criteria(),
+            "max_generation_length": self.max_generation_length(),
+        }
+
+        # Skip examples for which the templates are not applicable
+        if ctx != "":
+            cont_request = rf.greedy_until(ctx, request_args)
+            _requests.append(cont_request)
+
+        return _requests
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@ setuptools.setup(
        "click>=7.1",
        "scikit-learn>=0.24.1",
        "torch>=1.7",
-        "transformers>=4.1",
+        "transformers>=4.16",
        "sqlitedict==1.6.0",
        "pytablewriter==0.58.0",
        "sacrebleu==1.5.0",