Interpretable Multi-Step Reasoning with Knowledge Extraction on Complex Healthcare Question Answering
https://aclanthology.org/P19-1092.pdf
HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to
HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to
access a specialized position in the Spanish healthcare system, and are challenging
even for highly specialized humans.
...
...
@@ -15,7 +15,7 @@ from lm_eval.base import MultipleChoiceTask
_CITATION="""
@misc{liu2020interpretable,
title={Interpretable Multi-Step Reasoning with Knowledge Extraction on Complex Healthcare Question Answering},
title={Interpretable Multi-Step Reasoning with Knowledge Extraction on Complex Healthcare Question Answering},
author={Ye Liu and Shaika Chowdhury and Chenwei Zhang and Cornelia Caragea and Philip S. Yu},
year={2020},
eprint={2008.02434},
...
...
@@ -61,6 +61,12 @@ class HeadQABase(MultipleChoiceTask):
defdoc_to_text(self,doc):
returndoc["query"]
defshould_decontaminate(self):
returnTrue
defdoc_to_decontamination_query(self,doc):
returndoc["query"]
classHeadQAEn(HeadQABase):
DATASET_NAME="en"
...
...
@@ -76,4 +82,6 @@ class HeadQAEsDeprecated(HeadQABase):
def__init__(self):
super().__init__()
print("WARNING: headqa is deprecated. Please use headqa_es or headqa_en instead. See https://github.com/EleutherAI/lm-evaluation-harness/pull/240 for more info.")
\ No newline at end of file
print(
"WARNING: headqa is deprecated. Please use headqa_es or headqa_en instead. See https://github.com/EleutherAI/lm-evaluation-harness/pull/240 for more info."
@@ -20,7 +20,7 @@ from lm_eval.metrics import mean, perplexity
_CITATION="""
@misc{
author={Paperno, Denis and Kruszewski, Germán and Lazaridou, Angeliki and Pham, Quan Ngoc and Bernardi, Raffaella and Pezzelle, Sandro and Baroni, Marco and Boleda, Gemma and Fernández, Raquel},
author={Paperno, Denis and Kruszewski, Germán and Lazaridou, Angeliki and Pham, Quan Ngoc and Bernardi, Raffaella and Pezzelle, Sandro and Baroni, Marco and Boleda, Gemma and Fernández, Raquel},
@@ -18,7 +18,7 @@ from lm_eval.tasks.lambada import LAMBADA
_CITATION="""
@misc{
author={Paperno, Denis and Kruszewski, Germán and Lazaridou, Angeliki and Pham, Quan Ngoc and Bernardi, Raffaella and Pezzelle, Sandro and Baroni, Marco and Boleda, Gemma and Fernández, Raquel},
author={Paperno, Denis and Kruszewski, Germán and Lazaridou, Angeliki and Pham, Quan Ngoc and Bernardi, Raffaella and Pezzelle, Sandro and Baroni, Marco and Boleda, Gemma and Fernández, Raquel},
author={Paperno, Denis and Kruszewski, Germán and Lazaridou, Angeliki and Pham, Quan Ngoc and Bernardi, Raffaella and Pezzelle, Sandro and Baroni, Marco and Boleda, Gemma and Fernández, Raquel},
author={Paperno, Denis and Kruszewski, Germán and Lazaridou, Angeliki and Pham, Quan Ngoc and Bernardi, Raffaella and Pezzelle, Sandro and Baroni, Marco and Boleda, Gemma and Fernández, Raquel},
title={The LAMBADA dataset},
DOI={10.5281/zenodo.2630551},
publisher={Zenodo},
...
...
@@ -33,28 +33,32 @@ class MultilingualLAMBADA(lambada.LAMBADA):
title={Natural Questions: a Benchmark for Question Answering Research},
author={Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le and Slav Petrov},
year={2019},
journal={Transactions of the Association of Computational Linguistics}
}
"""
classNaturalQs(Task):
VERSION=0
DATASET_PATH="natural_questions"
DATASET_NAME=None
defhas_training_docs(self):
returnTrue
defhas_validation_docs(self):
returnTrue
defhas_test_docs(self):
returnFalse
deftraining_docs(self):
# Cache training for faster few-shot.
# Data is too large to fit in memory.
ifself._training_docsisNone:
self._training_docs=list(self.dataset["train"])
returnself._training_docs
defvalidation_docs(self):
returnself.dataset["validation"]
deffewshot_examples(self,k,rnd):
# Data is too large to fit in memory. We just sample from the first bit.
title={Natural Questions: a Benchmark for Question Answering Research},
author={Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le and Slav Petrov},
year={2019},
journal={Transactions of the Association of Computational Linguistics}
}
"""
classNaturalQs(Task):
VERSION=0
DATASET_PATH="natural_questions"
DATASET_NAME=None
defhas_training_docs(self):
returnTrue
defhas_validation_docs(self):
returnTrue
defhas_test_docs(self):
returnFalse
deftraining_docs(self):
# Cache training for faster few-shot.
# Data is too large to fit in memory.
ifself._training_docsisNone:
self._training_docs=list(self.dataset["train"])
returnself._training_docs
defvalidation_docs(self):
returnself.dataset["validation"]
deffewshot_examples(self,k,rnd):
# Data is too large to fit in memory. We just sample from the first bit.