Unverified Commit 0e4139b8 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Merge pull request #86 from jon-tow/doc-to-text-refactor

Move `doc_to_text` target code into `doc_to_target`
parents e5d0229f d77241eb
...@@ -34,16 +34,16 @@ class ANLIBase(HFTask): ...@@ -34,16 +34,16 @@ class ANLIBase(HFTask):
# TODO: figure out description # TODO: figure out description
return "" return ""
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
print(doc) print(doc)
# OA does this a bit weirdly: they prepend "anli 1: anli 1: " to the beginning # OA does this a bit weirdly: they prepend "anli 1: anli 1: " to the beginning
# of the prompt (yes, repeating it!). also, " True, False, or Neither?" is directly # of the prompt (yes, repeating it!). also, " True, False, or Neither?" is directly
# appended onto the question, with no "Answer:" or even a newline. Do we *really* # appended onto the question, with no "Answer:" or even a newline. Do we *really*
# want to do it exactly as OA did? # want to do it exactly as OA did?
q = doc['premise'] + '\nQuestion: ' + doc['hypothesis'] + '\n' return doc['premise'] + '\nQuestion: ' + doc['hypothesis'] + '\nTrue, False, or Neither?'
a = "True, False, or Neither?" + ((" " + ["True", "Neither", "False"][doc['label']]) if include_target else '') def doc_to_target(self, doc):
return q + a return " " + ["True", "Neither", "False"][doc['label']]
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -19,10 +19,11 @@ class ARCEasy(HFTask): ...@@ -19,10 +19,11 @@ class ARCEasy(HFTask):
# TODO: figure out description # TODO: figure out description
return "" return ""
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
q = "Question: " + doc['question'] + '\n' return "Question: " + doc['question'] + '\nAnswer:'
a = "Answer:" + ((" " + doc['choices']['text'][doc['choices']['label'].index(doc['answerKey'])]) if include_target else "")
return q + a def doc_to_target(self, doc):
return " " + doc['choices']['text'][doc['choices']['label'].index(doc['answerKey'])]
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: implement # TODO: implement
......
...@@ -39,11 +39,11 @@ class CoLA(HFTask): ...@@ -39,11 +39,11 @@ class CoLA(HFTask):
def fewshot_description(self): def fewshot_description(self):
return "Does this sentence make sense?:\tTrue or False?" return "Does this sentence make sense?:\tTrue or False?"
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "Sentence: {}\nAnswer:".format(doc["sentence"]) return "Sentence: {}\nAnswer:".format(doc["sentence"])
if include_target:
text += " {}".format({1: "True", 0: "False"}[doc["label"]]) def doc_to_target(self, doc):
return text return " {}".format({1: "True", 0: "False"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
...@@ -92,17 +92,17 @@ class MNLI(HFTask): ...@@ -92,17 +92,17 @@ class MNLI(HFTask):
if self.has_test_docs(): if self.has_test_docs():
return self.data["test_matched"] return self.data["test_matched"]
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format( return "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
doc["premise"], doc["premise"],
doc["hypothesis"], doc["hypothesis"],
) )
if include_target:
# True = entailment def doc_to_target(self, doc):
# False = contradiction # True = entailment
# Neither = neutral # False = contradiction
text += " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]]) # Neither = neutral
return text return " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -154,14 +154,14 @@ class MRPC(HFTask): ...@@ -154,14 +154,14 @@ class MRPC(HFTask):
def fewshot_description(self): def fewshot_description(self):
return "Indicate if both sentences mean the same thing." return "Indicate if both sentences mean the same thing."
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format( return "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
) )
if include_target:
text += " {}".format(yesno(doc["label"])) def doc_to_target(self, doc):
return text return " {}".format(yesno(doc["label"]))
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -194,16 +194,16 @@ class RTE(HFTask): ...@@ -194,16 +194,16 @@ class RTE(HFTask):
def has_test_docs(self): def has_test_docs(self):
return True return True
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "{}\nquestion:\t{}\tTrue or False?\nanswer:".format( return "{}\nquestion:\t{}\tTrue or False?\nanswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
) )
if include_target:
# 0 = entailment def doc_to_target(self, doc):
# 1 = not_entailment # 0 = entailment
text += " {}".format({0: "True", 1: "False"}[doc["label"]]) # 1 = not_entailment
return text return " {}".format({0: "True", 1: "False"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -236,16 +236,16 @@ class QNLI(HFTask): ...@@ -236,16 +236,16 @@ class QNLI(HFTask):
def has_test_docs(self): def has_test_docs(self):
return True return True
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "question:\t{}\nresponse:\t{}\nDoes this answer the question, Yes or No?:".format( return "question:\t{}\nresponse:\t{}\nDoes this answer the question, Yes or No?:".format(
doc["question"], doc["question"],
doc["sentence"], doc["sentence"],
) )
if include_target:
# True = entailment def doc_to_target(self, doc):
# False = not entailment # True = entailment
text += " {}".format({0: "Yes", 1: "No"}[doc["label"]]) # False = not entailment
return text return " {}".format({0: "Yes", 1: "No"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -281,14 +281,14 @@ class QQP(HFTask): ...@@ -281,14 +281,14 @@ class QQP(HFTask):
def fewshot_description(self): def fewshot_description(self):
return "Indicate if both questions ask the same thing." return "Indicate if both questions ask the same thing."
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "question 1:\t{}\nquestion 2:\t{}\nanswer:".format( return "question 1:\t{}\nquestion 2:\t{}\nanswer:".format(
doc["question1"], doc["question1"],
doc["question2"], doc["question2"],
) )
if include_target:
text += " {}".format(yesno(doc["label"])) def doc_to_target(self, doc):
return text return " {}".format(yesno(doc["label"]))
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -325,14 +325,14 @@ class STSB(HFTask): ...@@ -325,14 +325,14 @@ class STSB(HFTask):
return "Indicate if both sentences mean the same thing from a scale of 0-5, " \ return "Indicate if both sentences mean the same thing from a scale of 0-5, " \
"where 5 means identical and 0 means unrelated." "where 5 means identical and 0 means unrelated."
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format( return "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
) )
if include_target:
text += " {}".format(doc["label"]) def doc_to_target(self, doc):
return text return " {}".format(doc["label"])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -386,13 +386,13 @@ class SST(HFTask): ...@@ -386,13 +386,13 @@ class SST(HFTask):
def fewshot_description(self): def fewshot_description(self):
return "Indicate if each sentence is Positive or Negative." return "Indicate if each sentence is Positive or Negative."
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "sentence:\t{}\t\nanswer:".format( return "sentence:\t{}\t\nanswer:".format(
doc["sentence"], doc["sentence"],
) )
if include_target:
text += " {}".format({1: "Positive", 0: "Negative"}[doc["label"]]) def doc_to_target(self, doc):
return text return " {}".format({1: "Positive", 0: "Negative"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -425,17 +425,17 @@ class WNLI(HFTask): ...@@ -425,17 +425,17 @@ class WNLI(HFTask):
def has_test_docs(self): def has_test_docs(self):
return True return True
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format( return "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
) )
if include_target:
# True = entailment def doc_to_target(self, doc):
# False = contradiction # True = entailment
# Neither = neutral # False = contradiction
text += " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]]) # Neither = neutral
return text return " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
......
...@@ -34,22 +34,22 @@ class HellaSwag(HFTask): ...@@ -34,22 +34,22 @@ class HellaSwag(HFTask):
def fewshot_description(self): def fewshot_description(self):
return "Label for the relevant action: Sentences describing the context, with an incomplete sentence trailing\nanswer that plausibly completes the situation." return "Label for the relevant action: Sentences describing the context, with an incomplete sentence trailing\nanswer that plausibly completes the situation."
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = doc['activity_label'] + ': ' + doc['ctx'] + '\n' return doc['activity_label'] + ': ' + doc['ctx'] + '\n'
if include_target:
letter_answer = doc['label'] def doc_to_target(self, doc):
if letter_answer == '0': letter_answer = doc['label']
index = 0 if letter_answer == '0':
elif letter_answer == '1': index = 0
index = 1 elif letter_answer == '1':
elif letter_answer == '2': index = 1
index = 2 elif letter_answer == '2':
elif letter_answer == '3': index = 2
index = 3 elif letter_answer == '3':
else: index = 3
raise ValueError("HellaSwag from HF datasets contained an invalid answer key") else:
text += doc['endings'][index] raise ValueError("HellaSwag from HF datasets contained an invalid answer key")
return text return doc['endings'][index]
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -32,23 +32,19 @@ class NaturalQs(HFTask): ...@@ -32,23 +32,19 @@ class NaturalQs(HFTask):
return random.sample(self._traindocs, k) return random.sample(self._traindocs, k)
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
question = doc['question']['text'] return 'Q: ' + doc['question']['text'] + '\n\n' + 'A: '
text = 'Q: ' + question + '\n\n' + 'A: ' def doc_to_target(self, doc):
# There's a short answer and a long answer. Based on the paper, I'm using the long answer.
if include_target: short_answer = doc['annotations']['short_answers'][0]['text']
# There's a short answer and a long answer. Based on the paper, I'm using the long answer. long_answer_start = doc['annotations']['long_answer'][0]['start_token']
short_answer = doc['annotations']['short_answers'][0]['text'] long_answer_end = doc['annotations']['long_answer'][0]['end_token']
long_answer_start = doc['annotations']['long_answer'][0]['start_token'] long_answer_span = doc['document']['tokens']['token'][long_answer_start:long_answer_end]
long_answer_end = doc['annotations']['long_answer'][0]['end_token'] long_answer_is_html = doc['document']['tokens']['is_html'][long_answer_start:long_answer_end]
long_answer_span = doc['document']['tokens']['token'][long_answer_start:long_answer_end] long_answer_chars = [tok for (tok, is_html) in zip(long_answer_span, long_answer_is_html) if not is_html]
long_answer_is_html = doc['document']['tokens']['is_html'][long_answer_start:long_answer_end] long_answer = " ".join(long_answer_chars)
long_answer_chars = [tok for (tok, is_html) in zip(long_answer_span, long_answer_is_html) if not is_html] return long_answer # Replace with short_answer[0] for short answer
long_answer = " ".join(long_answer_chars)
text += long_answer # Replace with short_answer[0] for short answer
return text
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -36,22 +36,22 @@ class OpenBookQA(HFTask): ...@@ -36,22 +36,22 @@ class OpenBookQA(HFTask):
def fewshot_description(self): def fewshot_description(self):
return "Text of the question prompt\nText of the answer completion" return "Text of the question prompt\nText of the answer completion"
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = doc['question_stem'] + '\n' return doc['question_stem'] + '\n'
if include_target:
letter_answer = doc['answerKey'] def doc_to_target(self, doc):
if letter_answer == 'A': letter_answer = doc['answerKey']
index = 0 if letter_answer == 'A':
elif letter_answer == 'B': index = 0
index = 1 elif letter_answer == 'B':
elif letter_answer == 'C': index = 1
index = 2 elif letter_answer == 'C':
elif letter_answer == 'D': index = 2
index = 3 elif letter_answer == 'D':
else: index = 3
raise ValueError("OpenBookQA from HF datasets contained an invalid answer key") else:
text += doc['choices']['text'][index] + '.' raise ValueError("OpenBookQA from HF datasets contained an invalid answer key")
return text return doc['choices']['text'][index] + '.'
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -47,13 +47,14 @@ class PiQA(Dataset): ...@@ -47,13 +47,14 @@ class PiQA(Dataset):
def fewshot_description(self): def fewshot_description(self):
pass pass
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
if include_target:
rightanswer = int(doc[1][0])+1
return ''.join([doc[0]['goal'],' ',doc[0]['sol'+str(rightanswer)]])
#TODO: check if oa uses newline #TODO: check if oa uses newline
return doc['goal'] + ' ' return doc['goal'] + ' '
def doc_to_target(self, doc):
rightanswer = int(doc[1][0]) + 1
return ''.join([doc[0]['goal'],' ',doc[0]['sol'+str(rightanswer)]])
# TODO: Implement evaluation code # TODO: Implement evaluation code
# ***IMPORTANT***: this evaluation function needs to be written for the new framework. # ***IMPORTANT***: this evaluation function needs to be written for the new framework.
......
...@@ -55,11 +55,11 @@ class QuAC(Dataset): ...@@ -55,11 +55,11 @@ class QuAC(Dataset):
docs.append(doc) docs.append(doc)
return docs return docs
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: ' return 'TITLE: ' + doc['title'] + '\n' + 'PARAGRAPH: ' + doc['paragraph'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
if include_target:
text += doc['answer'] def doc_to_target(self, doc):
return text return doc['answer']
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -31,16 +31,16 @@ class SQuAD(HFTask): ...@@ -31,16 +31,16 @@ class SQuAD(HFTask):
# TODO: redo description # TODO: redo description
return "Title: The_Title_of_It\n\nBackground: A text passage as background to answer the question with.\n\nQ: Question about the passage.\n\nA: Answer." return "Title: The_Title_of_It\n\nBackground: A text passage as background to answer the question with.\n\nQ: Question about the passage.\n\nA: Answer."
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: ' return 'Title: ' + doc['title'] + '\n\n' + 'Background: ' + doc['context'] + '\n\n' + 'Q: ' + doc['question'] + '\n\n' + 'A: '
if include_target:
answer_list = doc['answers']['text'] def doc_to_target(self, doc):
if len(answer_list) > 0: answer_list = doc['answers']['text']
answer = answer_list[0] if len(answer_list) > 0:
else: answer = answer_list[0]
answer = 'unanswerable' else:
text += answer answer = 'unanswerable'
return text return answer
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -41,11 +41,11 @@ class StoryCloze(Dataset): ...@@ -41,11 +41,11 @@ class StoryCloze(Dataset):
def fewshot_description(self): def fewshot_description(self):
pass pass
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
if include_target: return ' '.join([*doc[1:5]])
return ' '.join([*doc[1:5],doc[int(doc[-1])-4]])
else: def doc_to_target(self, doc):
return ' '.join([*doc[1:5]]) return " " + doc[int(doc[-1]) - 4]
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -69,17 +69,17 @@ class CommitmentBank(HFTask): ...@@ -69,17 +69,17 @@ class CommitmentBank(HFTask):
def has_test_docs(self): def has_test_docs(self):
return True return True
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "{}\nquestion:\t{}\ttrue, false or neither?\nanswer:".format( return "{}\nquestion:\t{}\ttrue, false or neither?\nanswer:".format(
doc["premise"], doc["premise"],
doc["hypothesis"], doc["hypothesis"],
) )
if include_target:
# True = entailment def doc_to_target(self, doc):
# False = contradiction # True = entailment
# Neither = neutral # False = contradiction
text += " {}".format({0: "true", 1: "neither", 2: "false"}[doc["label"]]) # Neither = neutral
return text return " {}".format({0: "true", 1: "neither", 2: "false"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -117,18 +117,18 @@ class Copa(HFTask): ...@@ -117,18 +117,18 @@ class Copa(HFTask):
def has_test_docs(self): def has_test_docs(self):
return True return True
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
# Drop the period # Drop the period
connector = { connector = {
"cause": "because", "cause": "because",
"effect": "therefore", "effect": "therefore",
}[doc["question"]] }[doc["question"]]
text = doc["premise"].strip()[:-1] + f" {connector} " return doc["premise"].strip()[:-1] + f" {connector} "
if include_target:
correct_choice = doc["choice1"] if doc["label"] == 0 else doc["choice2"] def doc_to_target(self, doc):
# Connect the sentences correct_choice = doc["choice1"] if doc["label"] == 0 else doc["choice2"]
text += self.convert_choice(correct_choice) # Connect the sentences
return text return self.convert_choice(correct_choice)
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -170,10 +170,11 @@ class MultiRC(HFTask): ...@@ -170,10 +170,11 @@ class MultiRC(HFTask):
def fewshot_description(self): def fewshot_description(self):
return "READING COMPREHENSION ANSWER KEY" return "READING COMPREHENSION ANSWER KEY"
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
return f"{doc['paragraph']}\n\n{doc['question']}\n" \ return f"{doc['paragraph']}\n\n{doc['question']}\n"
+ (self.format_answer(answer=doc["answer"], label=doc["label"])
if include_target else "") def doc_to_target(self, doc):
return self.format_answer(answer=doc["answer"], label=doc["label"])
@staticmethod @staticmethod
def format_answer(answer, label): def format_answer(answer, label):
...@@ -229,16 +230,16 @@ class WordsInContext(HFTask): ...@@ -229,16 +230,16 @@ class WordsInContext(HFTask):
def has_test_docs(self): def has_test_docs(self):
return True return True
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
text = "{}\n{}\nquestion\tIs the word '{}' used in the same way in the" \ return "{}\n{}\nquestion\tIs the word '{}' used in the same way in the" \
" two sentences above?\nanswer:".format( " two sentences above?\nanswer:".format(
doc["sentence1"], doc["sentence1"],
doc["sentence2"], doc["sentence2"],
doc["sentence1"][doc["start1"]:doc["end1"]], doc["sentence1"][doc["start1"]:doc["end1"]],
) )
if include_target:
text += " {}".format({0: "no", 1: "yes"}[doc["label"]]) def doc_to_target(self, doc):
return text return " {}".format({0: "no", 1: "yes"}[doc["label"]])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -288,7 +289,7 @@ class SGWinogradSchemaChallenge(HFTask): ...@@ -288,7 +289,7 @@ class SGWinogradSchemaChallenge(HFTask):
"For each passage, you must identify which noun the pronoun marked in *bold*" \ "For each passage, you must identify which noun the pronoun marked in *bold*" \
" refers to.\n=====" " refers to.\n====="
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
raw_passage = doc["text"] raw_passage = doc["text"]
passage = ( passage = (
raw_passage[:doc["span2_index"]] raw_passage[:doc["span2_index"]]
...@@ -301,10 +302,11 @@ class SGWinogradSchemaChallenge(HFTask): ...@@ -301,10 +302,11 @@ class SGWinogradSchemaChallenge(HFTask):
+ f"Question: In the passage above, what does the pronoun \"*{pronoun}*\" refer to?\n" + f"Question: In the passage above, what does the pronoun \"*{pronoun}*\" refer to?\n"
+ "Answer:" + "Answer:"
) )
if include_target:
text += " {}".format(doc["span1_text"])
return text return text
def doc_to_target(self, doc):
return " {}".format(doc["span1_text"])
def evaluate(self, docs, lm, provide_description, num_fewshot): def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: Implement evaluation code using new framework # TODO: Implement evaluation code using new framework
...@@ -336,16 +338,12 @@ class RTE(HFTask): ...@@ -336,16 +338,12 @@ class RTE(HFTask):
#TODO: implement #TODO: implement
pass pass
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
if include_target: return ''.join([doc['premise'], '\nquestion: ',doc['hypothesis'], ' True or False?\nanswer: '])
if doc['label'] == 0:
answer = 'True' def doc_to_target(self, doc):
else: return 'True' if doc['label'] == 0 else 'False'
answer = 'False'
return ''.join([doc['premise'], '\nquestion: ',doc['hypothesis'], ' True or False?\nanswer: ', answer])
else:
return ''.join([doc['premise'], '\nquestion: ',doc['hypothesis'], ' True or False?\nanswer: '])
# TODO: Implement evaluation code # TODO: Implement evaluation code
# ***IMPORTANT***: this evaluation function needs to be written for the new framework. # ***IMPORTANT***: this evaluation function needs to be written for the new framework.
......
...@@ -39,11 +39,11 @@ class TriviaQA(Dataset): ...@@ -39,11 +39,11 @@ class TriviaQA(Dataset):
def fewshot_description(self): def fewshot_description(self):
pass pass
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
if include_target: return ''.join(['Q: ', doc['Question'], '\n\n','A: '])
return ''.join(['Q: ', doc['Question'], '\n\n','A: ', doc['Answer']['Aliases'][0]])
else: def doc_to_target(self, doc):
return ''.join(['Q: ', doc['Question'], '\n\n','A: ']) return doc['Answer']['Aliases'][0]
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -19,15 +19,15 @@ class WebQs(HFTask): ...@@ -19,15 +19,15 @@ class WebQs(HFTask):
# TODO: figure out description # TODO: figure out description
return "" return ""
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
print(doc) print(doc)
q = "Q: " + doc['question'] + '\n' return "Q: " + doc['question'] + '\nA:'
def doc_to_target(self, doc):
# this picks one answer to be the "correct" one, despite sometimes # this picks one answer to be the "correct" one, despite sometimes
# multiple correct answers being possible. # multiple correct answers being possible.
# TODO: make sure we're actually handling multi-answer correctly # TODO: make sure we're actually handling multi-answer correctly
a = "A:" + ((" " + doc['answers'][0]) if include_target else '') return " " + doc['answers'][0]
return q + a
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
...@@ -34,18 +34,19 @@ class Winogrande(HFTask): ...@@ -34,18 +34,19 @@ class Winogrande(HFTask):
def fewshot_description(self): def fewshot_description(self):
return "Winograd schema sentence including a either a ___ blank with a missing word, making the pronoun ambiguous, or the same with the word filled in." return "Winograd schema sentence including a either a ___ blank with a missing word, making the pronoun ambiguous, or the same with the word filled in."
def doc_to_text(self, doc, include_target=True): def doc_to_text(self, doc):
return doc['sentence']
def doc_to_target(self, doc):
text = doc['sentence'] text = doc['sentence']
if include_target: answer_n = doc['answer']
answer_n = doc['answer'] if answer_n == '1':
if answer_n == '1': answer = doc['option1']
answer = doc['option1'] elif answer_n == '2':
elif answer_n == '2': answer = doc['option2']
answer = doc['option2'] else:
else: raise ValueError("Winogrande from HF datasets contained an invalid answer key")
raise ValueError("Winogrande from HF datasets contained an invalid answer key") return text.replace("_", answer)
text = text.replace("_", answer)
return text
# TODO: Implement evaluation code # TODO: Implement evaluation code
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment