Commit 83b1c564 authored by Baber's avatar Baber
Browse files

add `llama_gpqa`

parent 4288b53e
task: llama_gpqa
dataset_path: Idavidrein/gpqa
dataset_name: gpqa_main
output_type: generate_until
test_split: train
doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{Question}}\nA. {{choice1}}\nB. {{choice2}}\nC. {{choice3}}\nD. {{choice4}}\nYour response should end with \"The best answer is [the_answer_letter]\" where the [the_answer_letter] is one of A, B, C or D."
process_docs: !function utils.process_docs
doc_to_target: answer
gen_prefix: "The best answer is"
generation_kwargs:
until:
- "\n"
max_gen_toks: 96
do_sample: false
temperature: 0
filter_list:
- name: exact_match
filter:
- function: multi_choice_regex
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: ([A-Z])
- function: remove_whitespace
- function: take_first
metric_list:
- metric: exact_match
ignore_punctuation: true
aggregation: mean
higher_is_better: true
num_fewshot: 0
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
import random
import re
import datasets
def preprocess(text):
if text is None:
return " "
text = text.strip()
text = text.replace(" [title]", ". ")
text = re.sub("\[.*?]", "", text)
text = text.replace(" ", " ")
return text
def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
def _process_doc(doc):
choices = [
preprocess(doc["Incorrect Answer 1"]),
preprocess(doc["Incorrect Answer 2"]),
preprocess(doc["Incorrect Answer 3"]),
preprocess(doc["Correct Answer"]),
]
random.shuffle(choices)
correct_answer_index = choices.index(preprocess(doc["Correct Answer"]))
out_doc = {
"choice1": choices[0],
"choice2": choices[1],
"choice3": choices[2],
"choice4": choices[3],
"choices": [choices[0], choices[1], choices[2], choices[3]],
"answer": f"{chr(65 + correct_answer_index)}",
}
return out_doc
return dataset.map(_process_doc)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment