Unverified Commit 827e2d51 authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge pull request #306 from jon-tow/add-swag

Add `SWAG`
parents 1da3d719 9b6c45a8
...@@ -15,6 +15,7 @@ from . import wsc273 ...@@ -15,6 +15,7 @@ from . import wsc273
from . import winogrande from . import winogrande
from . import quac from . import quac
from . import hellaswag from . import hellaswag
from . import swag
from . import openbookqa from . import openbookqa
from . import squad from . import squad
from . import naturalqs from . import naturalqs
...@@ -136,6 +137,7 @@ TASK_REGISTRY = { ...@@ -136,6 +137,7 @@ TASK_REGISTRY = {
# "quac": quac.QuAC, # not implemented yet # "quac": quac.QuAC, # not implemented yet
"logiqa": logiqa.LogiQA, "logiqa": logiqa.LogiQA,
"hellaswag": hellaswag.HellaSwag, "hellaswag": hellaswag.HellaSwag,
"swag": swag.SWAG,
"openbookqa": openbookqa.OpenBookQA, "openbookqa": openbookqa.OpenBookQA,
"squad2": squad.SQuAD2, "squad2": squad.SQuAD2,
"race": race.RACE, "race": race.RACE,
......
"""
SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference
https://arxiv.org/pdf/1808.05326.pdf
SWAG (Situations With Adversarial Generations) is an adversarial dataset
that consists of 113k multiple choice questions about grounded situations. Each
question is a video caption from LSMDC or ActivityNet Captions, with four answer
choices about what might happen next in the scene. The correct answer is the
(real) video caption for the next event in the video; the three incorrect
answers are adversarially generated and human verified, so as to fool machines
but not humans.
Homepage: https://rowanzellers.com/swag/
"""
from lm_eval.base import MultipleChoiceTask
_CITATION = """
@inproceedings{zellers2018swagaf,
title={SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference},
author={Zellers, Rowan and Bisk, Yonatan and Schwartz, Roy and Choi, Yejin},
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
year={2018}
}
"""
class SWAG(MultipleChoiceTask):
VERSION = 0
DATASET_PATH = "swag"
DATASET_NAME = "regular"
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return False
def training_docs(self):
if self._training_docs is None:
self._training_docs = list(map(self._process_doc, self.dataset["train"]))
return self._training_docs
def validation_docs(self):
return map(self._process_doc, self.dataset["validation"])
def _process_doc(self, doc):
out_doc = {
"query": doc["startphrase"],
"choices": [doc["ending0"], doc["ending1"], doc["ending2"], doc["ending3"]],
"gold": int(doc["label"]),
}
return out_doc
def doc_to_text(self, doc):
return doc["query"]
be4fcbad876124c4ba3c71970538a97fec0e36a9cc677c70b6c9243a7bcee0ec
\ No newline at end of file
{"results": {"swag": {"acc": 0.2482255323402979, "acc_norm": 0.24882535239428172, "acc_norm_stderr": 0.00305666959496067, "acc_stderr": 0.003054201832644171}}, "versions": {"swag": 0}}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment