Add `SWAG`

80f3e064 · jon-tow · 6caa0afd · 80f3e064 · 80f3e064
Commit 80f3e064 authored Apr 12, 2022 by jon-tow
Hide whitespace changes
Inline Side-by-side

Showing with 61 additions and 0 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +2 -0

lm_eval/tasks/swag.py lm_eval/tasks/swag.py +59 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -15,6 +15,7 @@ from . import wsc273
 from . import winogrande
 from . import quac
 from . import hellaswag
+from . import swag
 from . import openbookqa
 from . import squad
 from . import naturalqs
@@ -136,6 +137,7 @@ TASK_REGISTRY = {
    # "quac": quac.QuAC, # not implemented yet
    "logiqa": logiqa.LogiQA,
    "hellaswag": hellaswag.HellaSwag,
+    "swag": swag.SWAG,
    "openbookqa": openbookqa.OpenBookQA,
    "squad2": squad.SQuAD2,
    "race": race.RACE,

--- a/lm_eval/tasks/swag.py
+++ b/lm_eval/tasks/swag.py
+"""
+SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference
+https://arxiv.org/pdf/1808.05326.pdf
+
+SWAG (Situations With Adversarial Generations) is an adversarial dataset
+that consists of 113k multiple choice questions about grounded situations. Each
+question is a video caption from LSMDC or ActivityNet Captions, with four answer
+choices about what might happen next in the scene. The correct answer is the
+(real) video caption for the next event in the video; the three incorrect
+answers are adversarially generated and human verified, so as to fool machines
+but not humans.
+
+Homepage: https://rowanzellers.com/swag/
+"""
+from lm_eval.base import MultipleChoiceTask
+
+
+_CITATION = """
+@inproceedings{zellers2018swagaf,
+    title={SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference},
+    author={Zellers, Rowan and Bisk, Yonatan and Schwartz, Roy and Choi, Yejin},
+    booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
+    year={2018}
+}
+"""
+
+
+class SWAG(MultipleChoiceTask):
+    VERSION = 0
+    DATASET_PATH = "swag"
+    DATASET_NAME = "regular"
+
+    def has_training_docs(self):
+        return True
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        return False
+
+    def training_docs(self):
+        if self._training_docs is None:
+            self._training_docs = list(map(self._process_doc, self.dataset["train"]))
+        return self._training_docs
+
+    def validation_docs(self):
+        return map(self._process_doc, self.dataset["validation"])
+
+    def _process_doc(self, doc):
+        out_doc = {
+            "query": doc["startphrase"],
+            "choices": [doc["ending0"], doc["ending1"], doc["ending2"], doc["ending3"]],
+            "gold": int(doc["label"]),
+        }
+        return out_doc
+
+    def doc_to_text(self, doc):
+        return doc["query"]