Add GEM/xsum

6a2b94b2 · ken · 54999199 · 6a2b94b2 · 6a2b94b2
Commit 6a2b94b2 authored Apr 25, 2022 by ken
Hide whitespace changes
Inline Side-by-side

Showing with 155 additions and 0 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +10 -0

lm_eval/tasks/gem_xsum.py lm_eval/tasks/gem_xsum.py +145 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -53,6 +53,7 @@ from . import asdiv
 from . import gsm8k
 from . import storycloze
 from . import hans
+from . import gem_xsum

 # from . import e2e_nlg_cleaned

@@ -289,6 +290,15 @@ TASK_REGISTRY = {
    # "storycloze_2016": storycloze.StoryCloze2016,
    # "storycloze_2018": storycloze.StoryCloze2018,
    # "sat": sat.SATAnalogies,
+
+    #GEM/xum
+    "gem_xsum": gem_xsum.GEMXSUM,
+    "gem_xsum_challenge_sample": gem_xsum.GEMXSUMChallgeSample,
+    "gem_xsum_challenge_test_backtranslation": gem_xsum.GEMXSUMChallgeTestBacktranslation,
+    "gem_xsum_challenge_test_bfp_02": gem_xsum.GEMXSUMChallgeTestBFP02,
+    "gem_xsum_challenge_test_bfp_05": gem_xsum.GEMXSUMChallgeTestBFP05,
+    "gem_xsum_challenge_test_nopunc": gem_xsum.GEMXSUMChallgeTestNopunc,
+    "gem_xsum_challenge_test_covid": gem_xsum.GEMXSUMChallgeTestCovid,
 }



--- a/lm_eval/tasks/gem_xsum.py
+++ b/lm_eval/tasks/gem_xsum.py
+"""
+Don’t Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization
+https://arxiv.org/pdf/1808.08745.pdf
+
+The dataset is for the task of abstractive summarization in its extreme form, its about summarizing a document in a single sentence. It introduces extreme summarization, a new single-document summarization task which does not favor extractive strategies and calls for an abstractive modeling approach. The idea is to create a short, one-sentence news summary answering the question "What is the article about?". 
+
+This particularly uses the dataset that is part of the GEM benchmark
+Homepage: https://github.com/EdinburghNLP/XSum
+The GEM Benchmark: Natural Language Generation, its Evaluation and Metrics
+https://arxiv.org/pdf/2102.01672v3.pdf
+Write a Short Description of the task.
+Homepage: https://gem-benchmark.com/data_cards/XSum
+"""
+from lm_eval.base import PromptSourceTask
+from lm_eval.base import Task, rf
+
+
+_CITATION = """
+@InProceedings{xsum-emnlp,
+  author =      "Shashi Narayan and Shay B. Cohen and Mirella Lapata",
+  title =       "Don't Give Me the Details, Just the Summary! {T}opic-Aware Convolutional Neural Networks for Extreme Summarization",
+  booktitle =   "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing ",
+  year =        "2018",
+  address =     "Brussels, Belgium",
+}
+"""
+
+
+
+class GEMXSUMBase(PromptSourceTask):
+    VERSION = 0
+    DATASET_PATH = "GEM/xsum"
+    DATASET_NAME = None
+
+    def has_training_docs(self):
+        return True
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        return True
+
+    def stopping_criteria(self):
+        return '.'
+    def training_docs(self):
+        if self.has_training_docs():
+            # We cache training documents in `self._training_docs` for faster
+            # few-shot processing. If the data is too large to fit in memory,
+            # return the training data as a generator instead of a list.
+            if self._training_docs is None:
+                self._training_docs = list(self.dataset["train"])
+            return self._training_docs
+
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.dataset["validation"]
+
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset["test"]
+
+class GEMXSUM(GEMXSUMBase):
+    '''this is for train/validation/test'''
+
+class GEMXSUMChallgeSample(GEMXSUMBase):
+    '''this is for challenge_train_sample/challenge_validation_sample'''
+
+    def has_test_docs(self):
+        return False
+        
+    def training_docs(self):
+        if self.has_training_docs():
+            # We cache training documents in `self._training_docs` for faster
+            # few-shot processing. If the data is too large to fit in memory,
+            # return the training data as a generator instead of a list.
+            if self._training_docs is None:
+                self._training_docs = list(self.dataset["challenge_train_sample"])
+            return self._training_docs
+
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.dataset["challenge_validation_sample"]
+
+class GEMXSUMChallgeTestBacktranslation(GEMXSUMBase):
+    '''this is for challenge_test_backtranslation'''
+
+    def has_training_docs(self):
+        return False
+
+    def has_validation_docs(self):
+        return False
+
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset["challenge_test_backtranslation"]
+
+class GEMXSUMChallgeTestBFP02(GEMXSUMBase):
+    '''this is for challenge_test_bfp_02'''
+
+    def has_training_docs(self):
+        return False
+
+    def has_validation_docs(self):
+        return False
+
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset["challenge_test_bfp_02"]
+
+class GEMXSUMChallgeTestBFP05(GEMXSUMBase):
+    '''this is for challenge_test_bfp_05'''
+
+    def has_training_docs(self):
+        return False
+
+    def has_validation_docs(self):
+        return False
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset["challenge_test_bfp_05"]
+
+class GEMXSUMChallgeTestNopunc(GEMXSUMBase):
+    '''this is for challenge_test_nopunc'''
+
+    def has_training_docs(self):
+        return False
+
+    def has_validation_docs(self):
+        return False
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset["challenge_test_nopunc"]
+
+class GEMXSUMChallgeTestCovid(GEMXSUMBase):
+    '''this is for challenge_test_covid'''
+
+    def has_training_docs(self):
+        return False
+
+    def has_validation_docs(self):
+        return False
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset["challenge_test_covid"]
\ No newline at end of file