Merge pull request #7 from bigscience-workshop/kkawamu1/gem_xsum

Add GEM/xsum

Merge pull request #7 from bigscience-workshop/kkawamu1/gem_xsum
Add GEM/xsum
6e56cd0d · Charles Lovering · GitHub · fce17ee1 · a1b271dd · 6e56cd0d
Unverified Commit 6e56cd0d authored Apr 27, 2022 by Charles Lovering Committed by GitHub Apr 27, 2022
Show whitespace changes
Inline Side-by-side

Showing with 163 additions and 0 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +10 -0

lm_eval/tasks/gem_xsum.py lm_eval/tasks/gem_xsum.py +153 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -54,6 +54,7 @@ from . import gsm8k
 from . import storycloze
 from . import hans
 from . import gem_webnlg
+from . import gem_xsum
 # from . import e2e_nlg_cleaned
@@ -291,6 +292,15 @@ TASK_REGISTRY = {
    # "storycloze_2016": storycloze.StoryCloze2016,
    # "storycloze_2018": storycloze.StoryCloze2018,
    # "sat": sat.SATAnalogies,
+    #GEM/xum
+    "gem_xsum": gem_xsum.GEMXSUM,
+    "gem_xsum_challenge_sample": gem_xsum.GEMXSUMChallgeSample,
+    "gem_xsum_challenge_test_backtranslation": gem_xsum.GEMXSUMChallgeTestBacktranslation,
+    "gem_xsum_challenge_test_bfp_02": gem_xsum.GEMXSUMChallgeTestBFP02,
+    "gem_xsum_challenge_test_bfp_05": gem_xsum.GEMXSUMChallgeTestBFP05,
+    "gem_xsum_challenge_test_nopunc": gem_xsum.GEMXSUMChallgeTestNopunc,
+    "gem_xsum_challenge_test_covid": gem_xsum.GEMXSUMChallgeTestCovid,
 }

--- a/lm_eval/tasks/gem_xsum.py
+++ b/lm_eval/tasks/gem_xsum.py
+"""
+Don’t Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization
+https://arxiv.org/pdf/1808.08745.pdf
+The dataset is for the task of abstractive summarization in its extreme form, its about summarizing a document in a single sentence. It introduces extreme summarization, a new single-document summarization task which does not favor extractive strategies and calls for an abstractive modeling approach. The idea is to create a short, one-sentence news summary answering the question "What is the article about?". 
+This particularly uses the dataset that is part of the GEM benchmark
+Homepage: https://github.com/EdinburghNLP/XSum
+The GEM Benchmark: Natural Language Generation, its Evaluation and Metrics
+https://arxiv.org/pdf/2102.01672v3.pdf
+Write a Short Description of the task.
+Homepage: https://gem-benchmark.com/data_cards/XSum
+"""
+from lm_eval.base import PromptSourceTask
+from lm_eval.base import Task, rf
+_CITATION = """
+@InProceedings{xsum-emnlp,
+  author =      "Shashi Narayan and Shay B. Cohen and Mirella Lapata",
+  title =       "Don't Give Me the Details, Just the Summary! {T}opic-Aware Convolutional Neural Networks for Extreme Summarization",
+  booktitle =   "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing ",
+  year =        "2018",
+  address =     "Brussels, Belgium",
+}
+"""
+class GEMXSUMBase(PromptSourceTask):
+    VERSION = 0
+    DATASET_PATH = "GEM/xsum"
+    DATASET_NAME = None
+    SPLIT = None
+    def has_training_docs(self):
+        return True
+    def has_validation_docs(self):
+        return True
+    def has_test_docs(self):
+        return True
+    def stopping_criteria(self):
+        return '.'
+    def training_docs(self):
+        if self.has_training_docs():
+            # We cache training documents in `self._training_docs` for faster
+            # few-shot processing. If the data is too large to fit in memory,
+            # return the training data as a generator instead of a list.
+            if self._training_docs is None:
+                self._training_docs = list(self.dataset["train"])
+            return self._training_docs
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.dataset["validation"]
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset["test"]
+class GEMXSUM(GEMXSUMBase):
+    '''this is for train/validation/test'''
+    SPLIT = ''
+class GEMXSUMChallgeSample(GEMXSUMBase):
+    '''this is for challenge_train_sample/challenge_validation_sample'''
+    SPLIT = 'challenge_sample'
+    def has_test_docs(self):
+        return False
+    def training_docs(self):
+        if self.has_training_docs():
+            # We cache training documents in `self._training_docs` for faster
+            # few-shot processing. If the data is too large to fit in memory,
+            # return the training data as a generator instead of a list.
+            if self._training_docs is None:
+                self._training_docs = list(self.dataset["challenge_train_sample"])
+            return self._training_docs
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.dataset["challenge_validation_sample"]
+class GEMXSUMChallgeTestBacktranslation(GEMXSUMBase):
+    '''this is for challenge_test_backtranslation'''
+    SPLIT = 'challenge_test_backtranslation'
+    def has_training_docs(self):
+        return False
+    def has_validation_docs(self):
+        return False
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset[self.SPLIT]
+class GEMXSUMChallgeTestBFP02(GEMXSUMBase):
+    '''this is for challenge_test_bfp_02'''
+    SPLIT = 'challenge_test_bfp_02'
+    def has_training_docs(self):
+        return False
+    def has_validation_docs(self):
+        return False
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset[self.SPLIT]
+class GEMXSUMChallgeTestBFP05(GEMXSUMBase):
+    '''this is for challenge_test_bfp_05'''
+    SPLIT = 'challenge_test_bfp_05'
+    def has_training_docs(self):
+        return False
+    def has_validation_docs(self):
+        return False
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset[self.SPLIT]
+class GEMXSUMChallgeTestNopunc(GEMXSUMBase):
+    '''this is for challenge_test_nopunc'''
+    SPLIT = 'challenge_test_nopunc'
+    def has_training_docs(self):
+        return False
+    def has_validation_docs(self):
+        return False
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset[self.SPLIT]
+class GEMXSUMChallgeTestCovid(GEMXSUMBase):
+    '''this is for challenge_test_covid'''
+    SPLIT = 'challenge_test_covid'
+    def has_training_docs(self):
+        return False
+    def has_validation_docs(self):
+        return False
+    def test_docs(self):
+        if self.has_test_docs():
+            return self.dataset[self.SPLIT]
\ No newline at end of file