siqa.py

"""
SOCIAL IQA: Commonsense Reasoning about Social Interactions
https://aclanthology.org/D19-1454.pdf

Social IQa: Social Interaction QA, is a question-answering benchmark for testing
social commonsense intelligence. Contrary to many prior benchmarks that focus on
physical or taxonomic knowledge, Social IQa focuses on reasoning about people’s
actions and their social implications. For example, given an action like "Jesse
saw a concert" and a question like "Why did Jesse do this?", humans can easily
infer that Jesse wanted "to see their favorite performer" or "to enjoy the music",
and not "to see what's happening inside" or "to see if it works". The actions in Social IQa
span a wide variety of social situations, and answer candidates contain both human-curated
answers and adversarially-filtered machine-generated candidates.
Social IQa contains over 37,000 QA pairs for evaluating models’ abilities to reason
about the social implications of everyday events and situations.

Homepage: https://leaderboard.allenai.org/socialiqa/submissions/get-started
"""
from lm_eval.base import MultipleChoiceTask


_CITATION = """
@inproceedings{Sap2019SocialIC,
  title={Social IQA: Commonsense Reasoning about Social Interactions},
  author={Maarten Sap and Hannah Rashkin and Derek Chen and Ronan Le Bras and Yejin Choi},
  booktitle={Conference on Empirical Methods in Natural Language Processing},
  year={2019}
}
"""


class SIQA(MultipleChoiceTask):
    VERSION = 0
    DATASET_PATH = "social_i_qa"
    DATASET_NAME = None

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return False

    def training_docs(self):
        if self.has_training_docs():
            if self._training_docs is None:
                self._training_docs = list(
                    map(self._process_doc, self.dataset["train"])
                )
            return self._training_docs

    def validation_docs(self):
        if self.has_validation_docs():
            return map(self._process_doc, self.dataset["validation"])

    def _process_doc(self, doc):
        return {
            "query": f"{doc['context']} {doc['question']}",
            "choices": [doc['answerA'], doc['answerB'], doc['answerC']],
            "gold": int(doc['label']) - 1,  # `-1` because the labels are 1-indexed.
        }

    def doc_to_text(self, doc):
        return doc["query"]