Commit c3f724cf authored by Leo Gao's avatar Leo Gao
Browse files

Change glue and superglue prompts

parent 1050109b
......@@ -3,7 +3,7 @@ from lm_eval.base import rf, mean, f1_score, matthews_corrcoef
from scipy.stats import pearsonr, spearmanr
from tqdm import auto as tqdm_lib
from . common import HFTask, yesno
from ..utils import general_detokenize
# Single-Sentence Tasks
......@@ -22,10 +22,10 @@ class CoLA(HFTask):
return True
def fewshot_description(self):
return "Does this sentence make sense?:\tTrue or False?"
return "Does this sentence make sense? (True or False)"
def doc_to_text(self, doc):
return "Sentence: {}\nAnswer:".format(doc["sentence"])
return "{}\nQuestion: Does this sentence make sense?\nAnswer:".format(doc["sentence"])
def doc_to_target(self, doc):
return " {}".format({1: "True", 0: "False"}[doc["label"]])
......@@ -71,8 +71,8 @@ class SST(HFTask):
return "Indicate if each sentence is Positive or Negative."
def doc_to_text(self, doc):
return "sentence:\t{}\t\nanswer:".format(
doc["sentence"],
return "{}\nQuestion: Is this sentence Positive or Negative?\nAnswer:".format(
general_detokenize(doc["sentence"]),
)
def doc_to_target(self, doc):
......@@ -127,9 +127,9 @@ class MNLI(HFTask):
return self.data["test_matched"]
def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
return "{}\nQuestion: {} True, False or Neither?\nAnswer:".format(
doc["premise"],
doc["hypothesis"],
doc["hypothesis"] + ('' if doc["hypothesis"].endswith('.') else '.'),
)
def doc_to_target(self, doc):
......@@ -187,7 +187,7 @@ class QNLI(HFTask):
return True
def doc_to_text(self, doc):
return "question:\t{}\nresponse:\t{}\nDoes this answer the question, Yes or No?:".format(
return "{}\n{}\nQuestion: Does this response answer the question?\nAnswer:".format(
doc["question"],
doc["sentence"],
)
......@@ -235,7 +235,7 @@ class WNLI(HFTask):
return True
def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
return "{}\nQuestion: {} True, False or Neither?\nAnswer:".format(
doc["sentence1"],
doc["sentence2"],
)
......@@ -284,7 +284,7 @@ class RTE(HFTask):
return True
def doc_to_text(self, doc):
return "{}\nquestion:\t{}\tTrue or False?\nanswer:".format(
return "{}\nQuestion: {} True or False?\nAnswer:".format(
doc["sentence1"],
doc["sentence2"],
)
......@@ -338,17 +338,17 @@ class MRPC(HFTask):
return "Indicate if both sentences mean the same thing."
def doc_to_text(self, doc):
return "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format(
doc["sentence1"],
doc["sentence2"],
return "Sentence 1: {}\nSentence 2: {}\nQuestion: Do both sentences mean the same thing?\nAnswer:".format(
general_detokenize(doc["sentence1"]),
general_detokenize(doc["sentence2"]),
)
def doc_to_target(self, doc):
return " {}".format(yesno(doc["label"]))
def construct_requests(self, doc, ctx):
ll_yes, _ = rf.loglikelihood(ctx, " yes")
ll_no, _ = rf.loglikelihood(ctx, " no")
ll_yes, _ = rf.loglikelihood(ctx, " Yes")
ll_no, _ = rf.loglikelihood(ctx, " No")
return ll_yes, ll_no
def process_results(self, doc, results):
......@@ -390,7 +390,7 @@ class QQP(HFTask):
return "Indicate if both questions ask the same thing."
def doc_to_text(self, doc):
return "question 1:\t{}\nquestion 2:\t{}\nanswer:".format(
return "Question 1: {}\nQuestion 2: {}\nQuestion: Do both questions ask the same thing?\nAnswer:".format(
doc["question1"],
doc["question2"],
)
......@@ -443,7 +443,7 @@ class STSB(HFTask):
"where 5 means identical and 0 means unrelated."
def doc_to_text(self, doc):
return "sentence 1:\t{}\nsentence 2:\t{}\nanswer:".format(
return "sentence 1: {}\nsentence 2: {}\nAnswer:".format(
doc["sentence1"],
doc["sentence2"],
)
......
......@@ -28,7 +28,7 @@ class BoolQ(HFTask):
return "Read the following passages and answer each question with a yes or a no."
def doc_to_text(self, doc):
return f"{doc['passage']}\nquestion: {doc['question']}\nanswer:"
return f"{doc['passage']}\nQuestion: {doc['question']}\nAnswer:"
def doc_to_target(self, doc):
return " " + yesno(doc['label'])
......@@ -80,7 +80,7 @@ class CommitmentBank(HFTask):
"to the truth of the hypothesis. The three possible labels are true, false or neither."
def doc_to_text(self, doc):
return "{}\nquestion: {} true, false or neither?\nanswer:".format(
return "{}\nQuestion: {}. True, False or Neither?\nAnswer:".format(
doc["premise"],
doc["hypothesis"],
)
......@@ -89,12 +89,12 @@ class CommitmentBank(HFTask):
# True = entailment
# False = contradiction
# Neither = neutral
return " {}".format({0: "true", 1: "neither", 2: "false"}[doc["label"]])
return " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
def construct_requests(self, doc, ctx):
ll_true, _ = rf.loglikelihood(ctx, ' true')
ll_neither, _ = rf.loglikelihood(ctx, ' neither')
ll_false, _ = rf.loglikelihood(ctx, ' false')
ll_true, _ = rf.loglikelihood(ctx, ' True')
ll_neither, _ = rf.loglikelihood(ctx, ' Neither')
ll_false, _ = rf.loglikelihood(ctx, ' False')
return ll_true, ll_neither, ll_false
......@@ -214,15 +214,15 @@ class MultiRC(HFTask):
return "READING COMPREHENSION ANSWER KEY"
def doc_to_text(self, doc):
return f"{doc['paragraph']}\n\n{doc['question']}\n"
return f"{doc['paragraph']}\nQuestion: {doc['question']}\nAnswer:"
def doc_to_target(self, doc):
return self.format_answer(answer=doc["answer"], label=doc["label"])
@staticmethod
def format_answer(answer, label):
label_str = "True" if label else "False"
return f"[{label_str}] {answer}"
label_str = "Yes" if label else "No"
return f"{label_str}, {answer}"
def construct_requests(self, doc, ctx):
true_choice = self.format_answer(answer=doc["answer"], label=True)
......@@ -364,8 +364,8 @@ class WordsInContext(HFTask):
return ""
def doc_to_text(self, doc):
return "{}\n{}\nQuestion: Is the word '{}' used in the same way in the" \
" two sentences above?\nanswer:".format(
return "Sentence 1: {}\nSentence 2: {}\nQuestion: Is the word '{}' used in the same way in the" \
" two sentences above?\nAnswer:".format(
doc["sentence1"],
doc["sentence2"],
doc["sentence1"][doc["start1"]:doc["end1"]],
......@@ -438,7 +438,7 @@ class SGWinogradSchemaChallenge(HFTask):
# NOTE: HuggingFace span indices are word-based not character-based.
pre = " ".join(raw_passage.split()[:doc["span2_index"]])
post = raw_passage[len(pre) + len(doc["span2_text"]) + 1:]
passage = pre + " *{}*".format(doc['span2_text']) + post
passage = general_detokenize(pre + " *{}*".format(doc['span2_text']) + post)
noun = doc["span1_text"]
pronoun = doc["span2_text"]
text = (
......
import os
import re
class ExitCodeError(Exception):
......@@ -39,4 +40,13 @@ def chunks(iter, n):
yield arr
arr = []
if arr: yield arr
\ No newline at end of file
if arr: yield arr
def general_detokenize(string):
string = string.replace(" n't", "n't")
string = string.replace(" )", ")")
string = string.replace("( ", "(")
string = string.replace("\" ", "\"")
string = string.replace(" \"", "\"")
string = re.sub(r" (['.,])", r"\1")
return string
\ No newline at end of file
......@@ -16,7 +16,7 @@ def parse_args():
parser.add_argument('--model_args', default="")
parser.add_argument('--tasks', default="all_tasks")
parser.add_argument('--provide_description', action="store_true")
parser.add_argument('--num_fewshot', type=int, default=1)
parser.add_argument('--num_fewshot', type=int, default=0)
parser.add_argument('--seed', type=int, default=1234)
parser.add_argument('--output_path', default=None)
parser.add_argument('--limit', type=int, default=None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment