Unverified Commit 4462e415 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Make piqa inherit MultipleChoiceTask

parent 2522c27f
import numpy as np import numpy as np
from lm_eval.base import rf from lm_eval.base import rf
from ..metrics import mean from ..metrics import mean
from . common import HFTask from . common import MultipleChoiceTask, HFTask
class PiQA(HFTask): class PiQA(HFTask, MultipleChoiceTask):
DATASET_PATH = "piqa" DATASET_PATH = "piqa"
DATASET_NAME = None DATASET_NAME = None
...@@ -21,32 +21,29 @@ class PiQA(HFTask): ...@@ -21,32 +21,29 @@ class PiQA(HFTask):
# TODO: figure out fewshot description # TODO: figure out fewshot description
return "" return ""
def doc_to_text(self, doc): def _convert_standard(self, doc):
return "Question: "+doc["goal"] + "\nAnswer:" out_doc = {
"goal": doc["goal"],
"choices": [doc["sol1"], doc["sol2"]],
"gold": doc["label"],
}
return out_doc
def doc_to_target(self, doc): def _load_docs(self, docs):
solutions = [doc["sol1"], doc["sol2"]] for record in docs:
return " " + solutions[doc["label"]] yield self._convert_standard(record)
def construct_requests(self, doc, ctx): def training_docs(self):
ll_1, _ = rf.loglikelihood(ctx, " " + doc['sol1']) docs = super().training_docs()
ll_2, _ = rf.loglikelihood(ctx, " " + doc['sol2']) return self._load_docs(docs)
return ll_1, ll_2
def process_results(self, doc, results): def validation_docs(self):
completion_len = np.array([float(len(doc["sol1"])), float(len(doc["sol2"]))]) docs = super().validation_docs()
return self._load_docs(docs)
return { def test_docs(self):
'acc': np.argmax(results) == doc["label"], docs = super().test_docs()
'acc_norm': np.argmax(results / completion_len) == doc["label"] return self._load_docs(docs)
}
def aggregation(self): def doc_to_text(self, doc):
return { return "Question: " + doc["goal"] + "\nAnswer:"
'acc': mean
}
def higher_is_better(self):
return {
'acc': True
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment