Unverified Commit 44f03593 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Merge pull request #176 from EleutherAI/per_char_agg

Do per character loss aggregation for multiple choice tasks
parents fd26ef16 1ebf41d3
...@@ -226,19 +226,24 @@ class MultipleChoiceTask(Task): ...@@ -226,19 +226,24 @@ class MultipleChoiceTask(Task):
gold = doc["gold"] gold = doc["gold"]
acc = 1. if np.argmax(results) == gold else 0. acc = 1. if np.argmax(results) == gold else 0.
completion_len = np.array([float(len(i)) for i in doc["choices"]])
acc_norm = 1. if np.argmax(results / completion_len) == gold else 0.
return { return {
"acc": acc "acc": acc,
"acc_norm": acc_norm,
} }
def higher_is_better(self): def higher_is_better(self):
return { return {
"acc": True "acc": True,
"acc_norm": True,
} }
def aggregation(self): def aggregation(self):
return { return {
"acc": mean "acc": mean,
"acc_norm": mean,
} }
......
import numpy as np import numpy as np
from lm_eval.base import rf from lm_eval.base import MultipleChoiceTask, rf
from ..metrics import mean from ..metrics import mean
from . common import HFTask from . common import HFTask
class PiQA(HFTask): class PiQA(HFTask, MultipleChoiceTask):
DATASET_PATH = "piqa" DATASET_PATH = "piqa"
DATASET_NAME = None DATASET_NAME = None
...@@ -21,29 +21,29 @@ class PiQA(HFTask): ...@@ -21,29 +21,29 @@ class PiQA(HFTask):
# TODO: figure out fewshot description # TODO: figure out fewshot description
return "" return ""
def doc_to_text(self, doc): def _convert_standard(self, doc):
return "Question: "+doc["goal"] + "\nAnswer:" out_doc = {
"goal": doc["goal"],
"choices": [doc["sol1"], doc["sol2"]],
"gold": doc["label"],
}
return out_doc
def doc_to_target(self, doc): def _load_docs(self, docs):
solutions = [doc["sol1"], doc["sol2"]] for record in docs:
return " " + solutions[doc["label"]] yield self._convert_standard(record)
def construct_requests(self, doc, ctx): def training_docs(self):
ll_1, _ = rf.loglikelihood(ctx, " " + doc['sol1']) docs = super().training_docs()
ll_2, _ = rf.loglikelihood(ctx, " " + doc['sol2']) return self._load_docs(docs)
return ll_1, ll_2
def process_results(self, doc, results): def validation_docs(self):
return { docs = super().validation_docs()
'acc': np.argmax(results) == doc["label"] return self._load_docs(docs)
}
def aggregation(self): def test_docs(self):
return { docs = super().test_docs()
'acc': mean return self._load_docs(docs)
}
def higher_is_better(self): def doc_to_text(self, doc):
return { return "Question: " + doc["goal"] + "\nAnswer:"
'acc': True
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment