Unverified Commit d5f39bf8 authored by SuperCat's avatar SuperCat Committed by GitHub
Browse files

Add new dataset MMLU-SR tasks (#2032)



* add mmlusr tasks

* renamed all tasks names in mmlusr

* edit format and readme

* added mmlu_sr

* mmlu_sr -> mmlusr

* update

---------
Co-authored-by: default avatarlintangsutawika <lintang@eleuther.ai>
parent cdd954f9
"dataset_name": "question_only_virology"
"description": "The following are multiple choice questions (with answers) about virology.\n\
\n"
"tag": "mmlusr_question_only_other_tasks"
"include": "_mmlusr_q_yml"
"task": "mmlusr_question_only_virology"
"task_alias": "virology"
"dataset_name": "question_only_world_religions"
"description": "The following are multiple choice questions (with answers) about world\
\ religions.\n\n"
"tag": "mmlusr_question_only_humanities_tasks"
"include": "_mmlusr_q_yml"
"task": "mmlusr_question_only_world_religions"
"task_alias": "world religions"
import datasets
def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
def _helper(doc):
# Assuming that the 'answer' field in the dataset now contains numbers 0-3 instead of 'A', 'B', 'C', 'D'
answer_list = ["A", "B", "C", "D"]
# Convert numeric index to corresponding letter
answer_index = int(doc["answer"]) # Make sure the answer is an integer
answer_letter = answer_list[answer_index]
out_doc = {
"questions": doc["question"],
"choices": [doc["choice1"], doc["choice2"], doc["choice3"], doc["choice4"]],
"answer": answer_letter, # Include the letter for clarity
}
return out_doc
return dataset.map(_helper)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment