Commit 80f3e064 authored by jon-tow's avatar jon-tow
Browse files

Add `SWAG`

parent 6caa0afd
......@@ -15,6 +15,7 @@ from . import wsc273
from . import winogrande
from . import quac
from . import hellaswag
from . import swag
from . import openbookqa
from . import squad
from . import naturalqs
......@@ -136,6 +137,7 @@ TASK_REGISTRY = {
# "quac": quac.QuAC, # not implemented yet
"logiqa": logiqa.LogiQA,
"hellaswag": hellaswag.HellaSwag,
"swag": swag.SWAG,
"openbookqa": openbookqa.OpenBookQA,
"squad2": squad.SQuAD2,
"race": race.RACE,
......
"""
SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference
https://arxiv.org/pdf/1808.05326.pdf
SWAG (Situations With Adversarial Generations) is an adversarial dataset
that consists of 113k multiple choice questions about grounded situations. Each
question is a video caption from LSMDC or ActivityNet Captions, with four answer
choices about what might happen next in the scene. The correct answer is the
(real) video caption for the next event in the video; the three incorrect
answers are adversarially generated and human verified, so as to fool machines
but not humans.
Homepage: https://rowanzellers.com/swag/
"""
from lm_eval.base import MultipleChoiceTask
_CITATION = """
@inproceedings{zellers2018swagaf,
title={SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference},
author={Zellers, Rowan and Bisk, Yonatan and Schwartz, Roy and Choi, Yejin},
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
year={2018}
}
"""
class SWAG(MultipleChoiceTask):
VERSION = 0
DATASET_PATH = "swag"
DATASET_NAME = "regular"
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return False
def training_docs(self):
if self._training_docs is None:
self._training_docs = list(map(self._process_doc, self.dataset["train"]))
return self._training_docs
def validation_docs(self):
return map(self._process_doc, self.dataset["validation"])
def _process_doc(self, doc):
out_doc = {
"query": doc["startphrase"],
"choices": [doc["ending0"], doc["ending1"], doc["ending2"], doc["ending3"]],
"gold": int(doc["label"]),
}
return out_doc
def doc_to_text(self, doc):
return doc["query"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment