More task changes

9b14235d · Leo Gao · 96157fc7 · 9b14235d · 9b14235d · 9b14235d
Commit 9b14235d authored Feb 08, 2021 by Leo Gao
8 changed files
--- a/lm_eval/tasks/anli.py
+++ b/lm_eval/tasks/anli.py
@@ -39,7 +39,7 @@ class ANLIBase(HFTask):
        # of the prompt (yes, repeating it!). also, " True, False, or Neither?" is directly 
        # appended onto the question, with no "Answer:" or even a newline. Do we *really* 
        # want to do it exactly as OA did?
-        return doc['premise'] + '\nQuestion: ' + doc['hypothesis'] + '\nTrue, False, or Neither?'
+        return doc['premise'] + '\nQuestion: ' + doc['hypothesis'] + ' True, False, or Neither?\nAnswer:'

    def doc_to_target(self, doc):
        # True = entailment

--- a/lm_eval/tasks/arithmetic.py
+++ b/lm_eval/tasks/arithmetic.py
@@ -56,7 +56,7 @@ class Arithmetic(Task):
        return doc.completion

    def load_doc(self, doc_json):
-        return ArithmeticDoc(context=doc_json['context'], completion=doc_json['completion'])
+        return ArithmeticDoc(context=doc_json['context'].strip(), completion=doc_json['completion'].strip())
    
    def construct_requests(self, doc, ctx):
        ll, is_prediction = rf.loglikelihood(ctx, doc.completion)

--- a/lm_eval/tasks/glue.py
+++ b/lm_eval/tasks/glue.py
@@ -22,17 +22,18 @@ class CoLA(HFTask):
        return True

    def fewshot_description(self):
-        return "Does this sentence make sense? (True or False)"
+        # TODO
+        return ""

    def doc_to_text(self, doc):
        return "{}\nQuestion: Does this sentence make sense?\nAnswer:".format(doc["sentence"])

    def doc_to_target(self, doc):
-        return " {}".format({1: "True", 0: "False"}[doc["label"]])
+        return " {}".format({1: "yes", 0: "no"}[doc["label"]])

    def construct_requests(self, doc, ctx):
-        ll_true, _ = rf.loglikelihood(ctx, " True")
-        ll_false, _ = rf.loglikelihood(ctx, " False")
+        ll_true, _ = rf.loglikelihood(ctx, " yes")
+        ll_false, _ = rf.loglikelihood(ctx, " no")
        return ll_true, ll_false

    def process_results(self, doc, results):

--- a/lm_eval/tasks/piqa.py
+++ b/lm_eval/tasks/piqa.py
@@ -21,11 +21,11 @@ class PiQA(HFTask):
        return ""

    def doc_to_text(self, doc):
-        return doc["goal"] + "\n"
+        return "Question: "+doc["goal"] + "\nAnswer:"

    def doc_to_target(self, doc):
        solutions = [doc["sol1"], doc["sol2"]]
-        return solutions[doc["label"]]
+        return " " + solutions[doc["label"]]

    def construct_requests(self, doc, ctx):
        ll_1, _ = rf.loglikelihood(ctx, doc['sol1'])

--- a/lm_eval/tasks/pubmedqa.py
+++ b/lm_eval/tasks/pubmedqa.py
@@ -30,7 +30,7 @@ class Pubmed_QA(HFTask):

    def doc_to_text(self, doc):
        ctxs = "\n".join(doc["context"]["contexts"])
-        return "abstract: {}\nquestion: {}\nanswer:".format(
+        return "Abstract: {}\nQuestion: {}\nAnswer:".format(
            ctxs,
            doc["question"],
            doc["final_decision"]

--- a/lm_eval/tasks/race.py
+++ b/lm_eval/tasks/race.py
@@ -82,10 +82,10 @@ class RACE(HFTask):
    def doc_to_text(self, doc):
        text = 'Article: ' + doc['article'] + '\n\n'
        for problem in doc['problems'][:-1]:
-            question = 'Q: ' + problem['question'] + '\n\n'
-            answer = 'A: ' + self.get_answer_option(problem) + '\n\n'
-            text += question + answer
-        text += 'Q: ' + self.last_problem(doc)['question'] + '\n\n' + 'A:'
+            assert problem['question'][-6:] == '  _  .'
+
+            text += problem['question'][-5:] + self.get_answer_option(problem) + '\n'
+        text += self.last_problem(doc)['question']
        return text

    def doc_to_target(self, doc):

--- a/lm_eval/tasks/sciq.py
+++ b/lm_eval/tasks/sciq.py
@@ -62,4 +62,4 @@ class SciQ(MultipleChoiceTask):
        return self.load_docs("data/sciq/SciQ dataset-2 3/test.json")

    def doc_to_text(self, doc):
-        return "{}\n{}".format(doc["source"], doc["query"])
\ No newline at end of file
+        return "{}\nQuestion: {}\nAnswer:".format(doc["source"], doc["query"]).strip()
\ No newline at end of file
--- a/lm_eval/tasks/webqs.py
+++ b/lm_eval/tasks/webqs.py
@@ -19,7 +19,7 @@ class WebQs(HFTask):
        return ""

    def doc_to_text(self, doc):
-        return "Q: " + doc['question'] + '\nA:'
+        return "Question: " + doc['question'] + '\nAnswer:'

    def doc_to_target(self, doc):
        # this picks one answer to be the "correct" one, despite sometimes