qa4mre.py

"""
QA4MRE 2011-2013: Overview of Question Answering for Machine Reading Evaluation
https://www.cs.cmu.edu/~./hovy/papers/13CLEF-QA4MRE.pdf

The (English only) QA4MRE challenge which was run as a Lab at CLEF 2011-2013.
The main objective of this exercise is to develop a methodology for evaluating 
Machine Reading systems through Question Answering and Reading Comprehension 
Tests. Systems should be able to extract knowledge from large volumes of text 
and use this knowledge to answer questions. Four different tasks have been
organized during these years: Main Task, Processing Modality and Negation for
Machine Reading, Machine Reading of Biomedical Texts about Alzheimer's disease,
and Entrance Exam.

Homepage: http://nlp.uned.es/clef-qa/repository/qa4mre.php
"""
import os
import xml.etree.ElementTree as ET
from best_download import download_file
from lm_eval.base import MultipleChoiceTask


_CITATION = """
@inproceedings{Peas2013QA4MRE2O,
    title={QA4MRE 2011-2013: Overview of Question Answering for Machine Reading Evaluation},
    author={Anselmo Pe{\~n}as and Eduard H. Hovy and Pamela Forner and {\'A}lvaro Rodrigo and Richard F. E. Sutcliffe and Roser Morante},
    booktitle={CLEF},
    year={2013}
}
"""


class QA4MRE(MultipleChoiceTask):
    VERSION = 0
    YEAR = None
    def download(self):
        year = self.YEAR
        lang = "EN"
        base_path = (
            "http://nlp.uned.es/clef-qa/repository/js/scripts/downloadFile.php?"
            "file=/var/www/html/nlp/clef-qa/repository/resources/QA4MRE/"
        )
        # TODO: add side tasks?
        variable_year_path = {
            2011: '2011/Training_Data/Goldstandard/',
            2012: '2012/Main_Task/Training_Data/Goldstandard/Used_in_Evaluation/',
            2013: '2013/Main_Task/Training_Data/Goldstandard/'
        }
        sha256sums = {
            2011 : "6d2524952a3a015f2a82df785b85b5578681e3602ec276b4e72c01f4ebc50034",
            2012 : "f9edaf408f8ac93f89a643a0d0b19263a1bb5ce64f19b2af10df279a656dfb24",
            2013 : "c60e5aa4ec77e0493ef0b11d46bd1d74d58a499a3a2f871b8cf3af9536f0f094", 
        }
        vpath = variable_year_path[year]
        url_path = f"{base_path}{vpath}QA4MRE-{year}-{lang}_GS.xml"
        if not os.path.exists("data/qa4mre"):
            os.makedirs("data/qa4mre", exist_ok=True)
        if not os.path.isfile(f"data/qa4mre/QA4MRE-{year}-{lang}"):
            download_file(
                url_path,
                local_file=f"data/qa4mre/QA4MRE-{year}-{lang}_GS.xml",
                expected_checksum=sha256sums[year],
                )

    def has_training_docs(self):
        return False

    def has_validation_docs(self):
        return False

    def has_test_docs(self):
        return True

    def _convert_standard(self, question):
        choices = [i.text for i in question.iter('answer')]
        out_doc = {
            "query" : question.find('q_str').text,
            "choices": choices, 
            "gold" : int(question.find("./answer[@correct='Yes']").attrib["a_id"]) - 1,
        }
        return out_doc
    
    def load_docs(self, textfilename, tfds=False):
        tree = ET.parse(textfilename)
        root = tree.getroot()
        # TODO: context is much larger than the context sometimes
        # at the moment, it just gets left-truncated by LM automatically, and maybe that's good enough?
        for reading_test in root.iter('reading-test'):
            src = reading_test[0].text
            src = src.strip().replace("\'", "'")
            for qid, question in enumerate(reading_test.iter('q')):
                out_doc = self._convert_standard(question)
                out_doc['source'] = src
                yield out_doc

    def test_docs(self):
        return self.load_docs(f"data/qa4mre/QA4MRE-{self.YEAR}-EN_GS.xml")

    def doc_to_text(self, doc):
        return "{}\nQuestion: {}\nAnswer:".format(doc["source"], doc["query"])

class QA4MRE_2011(QA4MRE):
    YEAR = 2011

class QA4MRE_2012(QA4MRE):
    YEAR = 2012

class QA4MRE_2013(QA4MRE):
    YEAR = 2013