Unverified Commit 01108aca authored by Uanu's avatar Uanu Committed by GitHub
Browse files

Add a new task GPQA (the part CoT and generative) (#1482)



* Add new tasks of GPQA

* Add README

* Remove unused functions

* Remove unused functions

* Linters

* Add flexible match

* update

* Remove deplicate function

* Linter

* update

* Update lm_eval/filters/extraction.py
Co-authored-by: default avatarHailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>

* register multi_choice_regex

* Update

* run precommit

---------
Co-authored-by: default avatarHailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
Co-authored-by: default avatarhaileyschoelkopf <hailey@eleuther.ai>
parent 8a875e9a
import random
import re
import datasets
def preprocess(text):
if text is None:
return " "
text = text.strip()
text = text.replace(" [title]", ". ")
text = re.sub("\\[.*?\\]", "", text)
text = text.replace(" ", " ")
return text
def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
def _process_doc(doc):
choices = [
preprocess(doc["Incorrect Answer 1"]),
preprocess(doc["Incorrect Answer 2"]),
preprocess(doc["Incorrect Answer 3"]),
preprocess(doc["Correct Answer"]),
]
random.shuffle(choices)
correct_answer_index = choices.index(preprocess(doc["Correct Answer"]))
out_doc = {
"choice1": choices[0],
"choice2": choices[1],
"choice3": choices[2],
"choice4": choices[3],
"choices": [choices[0], choices[1], choices[2], choices[3]],
"answer": f"({chr(65 + correct_answer_index)})",
}
return out_doc
return dataset.map(_process_doc)
......@@ -3,7 +3,7 @@ from tqdm import tqdm
def main() -> None:
subset = ["extended", "diamond", "experts", "main"]
subset = ["extended", "diamond", "main"]
for task in tqdm(subset):
file_name = f"gpqa_{task}_n_shot.yaml"
......
......@@ -3,7 +3,7 @@ from tqdm import tqdm
def main() -> None:
subset = ["extended", "diamond", "experts", "main"]
subset = ["extended", "diamond", "main"]
setting = "zeroshot"
for task in tqdm(subset):
file_name = f"gpqa_{task}_{setting}.yaml"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment