Commit 346e2c22 authored by jon-tow's avatar jon-tow Committed by guac
Browse files

feat(tasks): Add SIQA

parent df3da98c
......@@ -23,6 +23,7 @@ from . import naturalqs
from . import sat
from . import arithmetic
from . import lambada
from . import siqa
from . import piqa
from . import prost
from . import mc_taco
......@@ -123,6 +124,7 @@ TASK_REGISTRY = {
"lambada_standard": lambada.LambadaStandard,
"lambada_openai_cloze": lambada_cloze.LambadaOpenAICloze,
"lambada_standard_cloze": lambada_cloze.LambadaStandardCloze,
"siqa": siqa.SIQA,
# multilingual lambada
**lambada_multilingual.construct_tasks(),
"wikitext": wikitext.WikiText,
......
"""
SOCIAL IQA: Commonsense Reasoning about Social Interactions
https://aclanthology.org/D19-1454.pdf
Social IQa: Social Interaction QA, is a question-answering benchmark for testing
social commonsense intelligence. Contrary to many prior benchmarks that focus on
physical or taxonomic knowledge, Social IQa focuses on reasoning about people’s
actions and their social implications. For example, given an action like "Jesse
saw a concert" and a question like "Why did Jesse do this?", humans can easily
infer that Jesse wanted "to see their favorite performer" or "to enjoy the music",
and not "to see what's happening inside" or "to see if it works". The actions in Social IQa
span a wide variety of social situations, and answer candidates contain both human-curated
answers and adversarially-filtered machine-generated candidates.
Social IQa contains over 37,000 QA pairs for evaluating models’ abilities to reason
about the social implications of everyday events and situations.
Homepage: https://leaderboard.allenai.org/socialiqa/submissions/get-started
"""
from lm_eval.base import MultipleChoiceTask
_CITATION = """
@inproceedings{Sap2019SocialIC,
title={Social IQA: Commonsense Reasoning about Social Interactions},
author={Maarten Sap and Hannah Rashkin and Derek Chen and Ronan Le Bras and Yejin Choi},
booktitle={Conference on Empirical Methods in Natural Language Processing},
year={2019}
}
"""
class SIQA(MultipleChoiceTask):
VERSION = 0
DATASET_PATH = "social_i_qa"
DATASET_NAME = None
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return False
def training_docs(self):
if self.has_training_docs():
if self._training_docs is None:
self._training_docs = list(
map(self._process_doc, self.dataset["train"])
)
return self._training_docs
def validation_docs(self):
if self.has_validation_docs():
return map(self._process_doc, self.dataset["validation"])
def _process_doc(self, doc):
return {
"query": f"{doc['context']}\nQuestion: {doc['question']}",
"choices": [doc['answerA'], doc['answerB'], doc['answerC']],
"gold": int(doc['label']) - 1, # `-1` because the labels are 1-indexed.
}
def doc_to_text(self, doc):
return doc["query"] + "\nAnswer:"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment