Unverified Commit 170ae096 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Merge pull request #226 from jon-tow/evaluator-description-option

Replace `fewshot_description` API with a `description_dict` based interface
parents 8728710c 02a4def2
......@@ -13,6 +13,3 @@ class LAMBADA_cloze(LAMBADA):
def doc_to_target(self, doc):
return " " + doc['text'].rsplit(' ', 1)[1]
def fewshot_description(self):
return "Fill in blank:\n"
......@@ -80,9 +80,5 @@ class LogiQA(MultipleChoiceTask):
def test_docs(self):
return self._load_docs(self.DATASET_PATH / "Test.txt")
def fewshot_description(self):
# TODO: figure out actual description
return ""
def doc_to_text(self, doc):
return doc["query"]
......@@ -29,9 +29,5 @@ class MathQA(HFTask, MultipleChoiceTask):
}
return out_doc
def fewshot_description(self):
# TODO: figure out description
return ""
def doc_to_text(self, doc):
return doc["query"]
......@@ -39,9 +39,6 @@ class MCTACO(HFTask):
def has_test_docs(self):
return True
def fewshot_description(self):
return "Determine whether the candidate answer is plausible (\"yes\") or not (\"no\")"
def doc_to_text(self, doc):
return f"{doc['sentence']}\nQuestion: {doc['question']}\n"\
f"Answer: {doc['answer']}\nPlausible:"
......
......@@ -70,10 +70,6 @@ class MuTualBase(Task):
def test_docs(self):
return NotImplemented
def fewshot_description(self):
# TODO: figure out fewshot description
return ""
def doc_to_text(self, doc):
return self.detokenize(doc["article"])
......
......@@ -21,10 +21,6 @@ class NaturalQs(HFTask):
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out description
return ""
def training_docs(self):
# Cache training for faster few-shot.
# Data is too large to fit in memory.
......
......@@ -25,9 +25,5 @@ class OpenBookQA(HFTask, MultipleChoiceTask):
}
return out_doc
def fewshot_description(self):
# TODO: figure out fewshot description
return ""
def doc_to_text(self, doc):
return doc["query"]
......@@ -18,10 +18,6 @@ class PiQA(HFTask, MultipleChoiceTask):
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out fewshot description
return ""
def _convert_standard(self, doc):
out_doc = {
"goal": doc["goal"],
......
......@@ -36,13 +36,14 @@ class PROST(HFTask, MultipleChoiceTask):
def has_test_docs(self):
return True
def fewshot_description(self):
# TODO: figure out fewshot description
return ""
def fewshot_context(self, doc, num_fewshot, provide_description, rnd):
def fewshot_context(self, doc, num_fewshot, provide_description=None, rnd=None, description=None):
assert num_fewshot == 0, 'PROST is designed to probe models in a zero-shot fashion only.'
return super().fewshot_context(doc, num_fewshot, provide_description, rnd)
return super().fewshot_context(
doc=doc,
num_fewshot=num_fewshot,
rnd=rnd,
description=description
)
def _convert_standard(self, doc):
out_doc = {
......
......@@ -23,11 +23,6 @@ class Pubmed_QA(HFTask):
# HF is labelled as train but its really just for testing
return self.data["train"]
def fewshot_description(self):
# Average ctx length in labelled dataset is 238.9
# 2 few-shot exmamples pushes it beyond context window
return ""
def doc_to_text(self, doc):
ctxs = "\n".join(doc["context"]["contexts"])
return "Abstract: {}\nQuestion: {}\nAnswer:".format(
......
......@@ -67,9 +67,6 @@ class QA4MRE(MultipleChoiceTask):
out_doc['source'] = src
yield out_doc
def fewshot_description(self):
return ""
def test_docs(self):
return self.load_docs(f"data/qa4mre/QA4MRE-{self.YEAR}-EN_GS.xml")
......
......@@ -51,11 +51,6 @@ class QuAC(Task):
def test_docs(self):
raise NotImplementedError("QuAC has no test docs.")
def fewshot_description(self):
# TODO: figure out fewshot description
desc = "TITLE: Title of the context passage - subtitle of the passage\nPARAGRAPH: Passage describing the relevant information for answering questions.\n\nQ: Text of a question.\n\nA: Answer to the question, based on the passage. If it cannot be answered based on the passage, write CANNOTANSWER"
return desc
def load_doc(self, myjson):
docs = []
for item in myjson:
......
......@@ -65,10 +65,6 @@ class RACE(HFTask):
def test_docs(self):
return self._collate_data("test")
def fewshot_description(self):
# TODO: figure out description
return ""
@classmethod
def get_answer_option(cls, problem):
answer = cls.letter_to_num[problem['answer']]
......
......@@ -61,10 +61,5 @@ class SATAnalogies(MultipleChoiceTask):
}
yield doc
def fewshot_description(self):
# TODO: figure out actual description
return ""
def doc_to_text(self, doc):
return "{} is to {} as".format(*doc['query'])
......@@ -50,9 +50,6 @@ class SciQ(MultipleChoiceTask):
for record in docs:
yield self._convert_standard(record)
def fewshot_description(self):
return ""
def training_docs(self):
return self.load_docs("data/sciq/SciQ dataset-2 3/train.json")
......@@ -63,4 +60,4 @@ class SciQ(MultipleChoiceTask):
return self.load_docs("data/sciq/SciQ dataset-2 3/test.json")
def doc_to_text(self, doc):
return "{}\nQuestion: {}\nAnswer:".format(doc["source"], doc["query"]).strip()
\ No newline at end of file
return "{}\nQuestion: {}\nAnswer:".format(doc["source"], doc["query"]).strip()
......@@ -41,10 +41,6 @@ class SQuAD2(HFTask):
def validation_docs(self):
return self.data["validation"]
def fewshot_description(self):
# TODO: figure out description
return ""
def doc_to_text(self, doc):
return 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Question: ' + doc['question'] + '\n\n' + 'Answer:'
......
......@@ -27,18 +27,12 @@ class StoryCloze(Task):
filereader = csv.reader(file)
return list(filereader)
def validation_docs(self):
return self.load_doc("data/storycloze/cloze_test_val__winter2018-cloze_test_ALL_val - 1 - 1.csv")
def test_docs(self):
return self.load_doc("data/storycloze/cloze_test_test__winter2018-cloze_test_ALL_test - 1.csv")
def fewshot_description(self):
# TODO: figure out fewshot description
return ""
def doc_to_text(self, doc):
return ' '.join([*doc[1:5]])
......
......@@ -26,10 +26,6 @@ class BoolQ(HFTask):
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out actual description
return "Read the following passages and answer each question with a yes or a no."
def doc_to_text(self, doc):
return f"{doc['passage']}\nQuestion: {doc['question']}?\nAnswer:"
......@@ -78,11 +74,6 @@ class CommitmentBank(HFTask):
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out actual description
return "Given a premise and a hypothesis, classify whether the author of the premise is committed" \
"to the truth of the hypothesis. The three possible labels are true, false or neither."
def doc_to_text(self, doc):
return "{}\nQuestion: {}. True, False or Neither?\nAnswer:".format(
doc["premise"],
......@@ -150,11 +141,6 @@ class Copa(HFTask):
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out actual description
return "Given a premise and one alternative with a causal relation to the premise and another without," \
"choose the more plausible alternative"
def doc_to_text(self, doc):
# Drop the period
connector = {
......@@ -215,10 +201,6 @@ class MultiRC(HFTask):
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out actual description
return "READING COMPREHENSION ANSWER KEY"
def doc_to_text(self, doc):
return f"{doc['paragraph']}\nQuestion: {doc['question']}\nAnswer:"
......@@ -270,10 +252,6 @@ class ReCoRD(HFTask):
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out actual description
return ""
def training_docs(self):
# In ReCoRD, each doc manifests multiple "examples" in the context of few shot example packing.
# Each doc consists of multiple answer candidates, each of which is scored yes/no.
......@@ -363,10 +341,6 @@ class WordsInContext(HFTask):
def has_test_docs(self):
return False
def fewshot_description(self):
# TODO: figure out actual description
return ""
def doc_to_text(self, doc):
return "Sentence 1: {}\nSentence 2: {}\nQuestion: Is the word '{}' used in the same way in the" \
" two sentences above?\nAnswer:".format(
......@@ -432,12 +406,6 @@ class SGWinogradSchemaChallenge(HFTask):
]
return self._training_docs
def fewshot_description(self):
return "Final Exam with Answer Key\n" \
"Instructions: Please carefully read the following passages. " \
"For each passage, you must identify which noun the pronoun marked in *bold*" \
" refers to.\n====="
def doc_to_text(self, doc):
raw_passage = doc["text"]
# NOTE: HuggingFace span indices are word-based not character-based.
......
......@@ -166,12 +166,6 @@ class GeneralTranslationTask(Task):
"ter": False,
}
def fewshot_description(self):
language_codes = self.sacrebleu_language_pair.split("-")
src_lang = code_to_language(language_codes[0])
tar_lang = code_to_language(language_codes[1])
return f"Translate these {src_lang} phrases to {tar_lang}."
def __str__(self):
language_codes = self.sacrebleu_language_pair.split("-")
src_lang = code_to_language(language_codes[0])
......
......@@ -36,10 +36,6 @@ class TriviaQA(Task):
def test_docs(self):
raise NotImplementedError()
def fewshot_description(self):
# TODO: figure out fewshot description
return ""
def doc_to_text(self, doc):
return f"Question: {doc['Question']}\nAnswer:"
......@@ -56,7 +52,6 @@ class TriviaQA(Task):
ret.append(alias)
return ret
def construct_requests(self, doc, ctx):
ret = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment