"test/vscode:/vscode.git/clone" did not exist on "c2b22d6129b41978d4584cdd34509da8e512c0f4"
Unverified Commit 7614a8f3 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Merge pull request #139 from jeffhsu3/pubmedqa

QA4MRE Task
parents e9e5d0a2 5960874b
......@@ -20,6 +20,7 @@ from . import triviaqa
from . import pubmedqa
from . import sciq
from . import webqs
from . import qa4mre
TASK_REGISTRY = {
......@@ -48,8 +49,13 @@ TASK_REGISTRY = {
"lambada": lambada.LAMBADA,
"piqa": piqa.PiQA,
# Science related
"pubmedqa" : pubmedqa.Pubmed_QA,
"sciq" : sciq.SciQ,
#"qa4mre" : qa4mre.QA4MRE,
"qa4mre_2011" : qa4mre.QA4MRE_2011,
"qa4mre_2012" : qa4mre.QA4MRE_2012,
"qa4mre_2013" : qa4mre.QA4MRE_2013,
#"triviaqa": triviaqa.TriviaQA,
"arc_easy": arc.ARCEasy,
......
import os
import numpy as np
from best_download import download_file
from lm_eval.base import MultipleChoiceTask, rf, mean
import xml.etree.ElementTree as ET
import random
class QA4MRE(MultipleChoiceTask):
YEAR = None
def download(self):
year = self.YEAR
lang = "EN"
base_path = (
"http://nlp.uned.es/clef-qa/repository/js/scripts/downloadFile.php?"
"file=/var/www/html/nlp/clef-qa/repository/resources/QA4MRE/"
)
# TODO: add side tasks?
variable_year_path = {
2011: '2011/Training_Data/Goldstandard/',
2012: '2012/Main_Task/Training_Data/Goldstandard/Used_in_Evaluation/',
2013: '2013/Main_Task/Training_Data/Goldstandard/'
}
sha256sums = {
2011 : "6d2524952a3a015f2a82df785b85b5578681e3602ec276b4e72c01f4ebc50034",
2012 : "f9edaf408f8ac93f89a643a0d0b19263a1bb5ce64f19b2af10df279a656dfb24",
2013 : "c60e5aa4ec77e0493ef0b11d46bd1d74d58a499a3a2f871b8cf3af9536f0f094",
}
vpath = variable_year_path[year]
url_path = f"{base_path}{vpath}QA4MRE-{year}-{lang}_GS.xml"
if not os.path.exists("data/qa4mre"):
os.mkdir("data/qa4mre")
if not os.path.isfile(f"data/qa4mre/QA4MRE-{year}-{lang}"):
download_file(
url_path,
f"data/qa4mre/QA4MRE-{year}-{lang}_GS.xml",
checksum=sha256sums[year],
)
def has_training_docs(self):
return False
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def fewshot_examples(self, k):
# Since only test docs sample from test docs
if self._training_docs is None:
self._training_docs = list(self.test_docs())
return random.sample(self._training_docs, k)
def _convert_standard(self, question):
choices = [i.text for i in question.iter('answer')]
out_doc = {
"query" : question.find('q_str').text,
"choices": choices,
"gold" : int(question.find("./answer[@correct='Yes']").attrib["a_id"]) - 1,
}
return out_doc
def load_docs(self, textfilename, tfds=False):
tree = ET.parse(textfilename)
root = tree.getroot()
# TODO: context is much larger than the context sometimes
# at the moment, it just gets left-truncated by LM automatically, and maybe that's good enough?
for reading_test in root.iter('reading-test'):
src = reading_test[0].text
src = src.strip().replace("\'", "'")
for qid, question in enumerate(reading_test.iter('q')):
out_doc = self._convert_standard(question)
out_doc['source'] = src
yield out_doc
def fewshot_description(self):
return ""
def test_docs(self):
return self.load_docs(f"data/qa4mre/QA4MRE-{self.YEAR}-EN_GS.xml")
def doc_to_text(self, doc):
return "{}\nQuestion: {}\nAnswer:".format(doc["source"], doc["query"])
class QA4MRE_2011(QA4MRE):
YEAR = 2011
class QA4MRE_2012(QA4MRE):
YEAR = 2012
class QA4MRE_2013(QA4MRE):
YEAR = 2013
......@@ -3,6 +3,7 @@ import json
from ..utils import sh
from lm_eval.base import MultipleChoiceTask, rf, mean
import zipfile
from best_download import download_file
class SciQ(MultipleChoiceTask):
......@@ -10,9 +11,11 @@ class SciQ(MultipleChoiceTask):
def download(self):
if not os.path.exists('data/sciq'):
os.mkdir('data/sciq')
sh((
"wget https://ai2-public-datasets.s3.amazonaws.com/sciq/SciQ.zip -O data/sciq/SciQ.zip"
))
download_file(
'https://ai2-public-datasets.s3.amazonaws.com/sciq/SciQ.zip',
'data/sciq/SciQ.zip',
'7f3312f6ac6b09970b32942d106a8c44ec0dad46a0369f17d635aff8e348a87c',
)
with zipfile.ZipFile("data/sciq/SciQ.zip", "r") as zf:
zf.extractall("data/sciq/")
......@@ -48,8 +51,6 @@ class SciQ(MultipleChoiceTask):
yield self._convert_standard(record)
def fewshot_description(self):
# Average ctx length in labelled dataset is 238.9
# 2 few-shot exmamples pushes it beyond context window
return ""
def training_docs(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment