Commit a264f449 authored by jeffhsu3's avatar jeffhsu3
Browse files

removed truncation of source

parent 18e08d29
...@@ -70,8 +70,4 @@ class Pubmed_QA(HFTask): ...@@ -70,8 +70,4 @@ class Pubmed_QA(HFTask):
def higher_is_better(self): def higher_is_better(self):
return { return {
"acc" : True "acc" : True
<<<<<<< HEAD
} }
=======
}
>>>>>>> 79878d135a19846868c182aa986b6ec740e7c884
...@@ -27,9 +27,6 @@ class QA4MRE(MultipleChoiceTask): ...@@ -27,9 +27,6 @@ class QA4MRE(MultipleChoiceTask):
} }
vpath = variable_year_path[year] vpath = variable_year_path[year]
url_path = f"{base_path}{vpath}QA4MRE-{year}-{lang}_GS.xml" url_path = f"{base_path}{vpath}QA4MRE-{year}-{lang}_GS.xml"
# Should all the years be concatenated together?
# Separate let's us compare with results from the competition
# Competition also separated out by topics
if not os.path.exists("data/qa4mre"): if not os.path.exists("data/qa4mre"):
os.mkdir("data/qa4mre") os.mkdir("data/qa4mre")
if not os.path.isfile(f"data/qa4mre/QA4MRE-{year}-{lang}"): if not os.path.isfile(f"data/qa4mre/QA4MRE-{year}-{lang}"):
...@@ -59,7 +56,7 @@ class QA4MRE(MultipleChoiceTask): ...@@ -59,7 +56,7 @@ class QA4MRE(MultipleChoiceTask):
out_doc = { out_doc = {
"query" : question.find('q_str').text, "query" : question.find('q_str').text,
"choices": choices, "choices": choices,
"gold" : int(question.find("./answer[@correct='Yes']").attrib["a_id"])-1, "gold" : int(question.find("./answer[@correct='Yes']").attrib["a_id"]) - 1,
} }
return out_doc return out_doc
...@@ -67,14 +64,12 @@ class QA4MRE(MultipleChoiceTask): ...@@ -67,14 +64,12 @@ class QA4MRE(MultipleChoiceTask):
tree = ET.parse(textfilename) tree = ET.parse(textfilename)
root = tree.getroot() root = tree.getroot()
# TODO: context is much larger than the context sometimes # TODO: context is much larger than the context sometimes
TRUNCATE = 4000
# Multiple questions per document
for reading_test in root.iter('reading-test'): for reading_test in root.iter('reading-test'):
src = reading_test[0].text src = reading_test[0].text
src = src.rstrip("\n\t\t\t").replace("\'", "'") src = src.rstrip("\n\t\t\t").replace("\'", "'")
for qid, question in enumerate(reading_test.iter('q')): for qid, question in enumerate(reading_test.iter('q')):
out_doc = self._convert_standard(question) out_doc = self._convert_standard(question)
out_doc['source'] = src[:TRUNCATE] out_doc['source'] = src
yield out_doc yield out_doc
def fewshot_description(self): def fewshot_description(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment