"docs/vscode:/vscode.git/clone" did not exist on "ec034c15023ca0412a91aeddd8aad164e155b695"
Unverified Commit cc238121 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Merge pull request #243 from bigscience-workshop/thomas/fix_multirc

Fix multirc
parents 170ae096 73d0ae5e
...@@ -52,13 +52,14 @@ def acc_all(items): ...@@ -52,13 +52,14 @@ def acc_all(items):
docs = list(zip(*items))[1] docs = list(zip(*items))[1]
for doc, pred in zip(docs, preds): for doc, pred in zip(docs, preds):
paragraph_id = doc["idx"]["paragraph"]
question_id = doc["idx"]["question"] question_id = doc["idx"]["question"]
if question_id not in question_scoring_dict: if (paragraph_id, question_id) not in question_scoring_dict:
question_scoring_dict[question_id] = [] question_scoring_dict[(paragraph_id, question_id)] = []
gold_label = doc["label"] == 1 gold_label = doc["label"] == 1
question_scoring_dict[question_id].append(gold_label == pred)
question_scoring_dict[(paragraph_id, question_id)].append(gold_label == pred)
acc = np.mean([int(all(x)) for x in question_scoring_dict.values()]) acc = np.mean([int(all(x)) for x in question_scoring_dict.values()])
return acc return acc
......
...@@ -188,7 +188,7 @@ class Copa(HFTask): ...@@ -188,7 +188,7 @@ class Copa(HFTask):
class MultiRC(HFTask): class MultiRC(HFTask):
VERSION = 0 VERSION = 1
DATASET_PATH = "super_glue" DATASET_PATH = "super_glue"
DATASET_NAME = "multirc" DATASET_NAME = "multirc"
...@@ -210,7 +210,7 @@ class MultiRC(HFTask): ...@@ -210,7 +210,7 @@ class MultiRC(HFTask):
@staticmethod @staticmethod
def format_answer(answer, label): def format_answer(answer, label):
label_str = "yes" if label else "no" label_str = "yes" if label else "no"
return f"{label_str}, {answer}" return f"{answer}\nIs the answer correct? {label_str}"
def construct_requests(self, doc, ctx): def construct_requests(self, doc, ctx):
true_choice = self.format_answer(answer=doc["answer"], label=True) true_choice = self.format_answer(answer=doc["answer"], label=True)
...@@ -222,7 +222,8 @@ class MultiRC(HFTask): ...@@ -222,7 +222,8 @@ class MultiRC(HFTask):
return ll_true_choice, ll_false_choice return ll_true_choice, ll_false_choice
def process_results(self, doc, results): def process_results(self, doc, results):
pred = np.argmax(results) ll_true_choice, ll_false_choice = results
pred = ll_true_choice > ll_false_choice
return { return {
"acc": (pred, doc) "acc": (pred, doc)
} }
......
0e793bd6f637a70a04c6f2cda080188fc037961b2f909095fe63f7bdbc4a90c6
\ No newline at end of file
{"results": {"multirc": {"acc": 0.046169989506820566, "acc_stderr": 0.006801377886208738}}, "versions": {"multirc": 1}}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment