Unverified Commit d5f39bf8 authored by SuperCat's avatar SuperCat Committed by GitHub
Browse files

Add new dataset MMLU-SR tasks (#2032)



* add mmlusr tasks

* renamed all tasks names in mmlusr

* edit format and readme

* added mmlu_sr

* mmlu_sr -> mmlusr

* update

---------
Co-authored-by: default avatarlintangsutawika <lintang@eleuther.ai>
parent cdd954f9
"dataset_name": "answer_only_world_religions"
"description": "The following are multiple choice questions (with answers) about world\
\ religions.\n\n"
"tag": "mmlusr_answer_only_humanities_tasks"
"include": "_mmlusr_a_yml"
"task": "mmlusr_answer_only_world_religions"
"task_alias": "world religions"
import datasets
def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
def _helper(doc):
# Assuming that the 'answer' field in the dataset now contains numbers 0-3 instead of 'A', 'B', 'C', 'D'
answer_list = ["A", "B", "C", "D"]
# Convert numeric index to corresponding letter
answer_index = int(doc["answer"]) # Make sure the answer is an integer
answer_letter = answer_list[answer_index]
out_doc = {
"questions": doc["question"],
"choices": [doc["choice1"], doc["choice2"], doc["choice3"], doc["choice4"]],
"answer": answer_letter, # Include the letter for clarity
}
return out_doc
return dataset.map(_helper)
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
import yaml
from tqdm import tqdm
eval_logger = logging.getLogger("lm-eval")
SUBJECTS = {
"abstract_algebra": "stem",
"anatomy": "stem",
"astronomy": "stem",
"business_ethics": "other",
"clinical_knowledge": "other",
"college_biology": "stem",
"college_chemistry": "stem",
"college_computer_science": "stem",
"college_mathematics": "stem",
"college_medicine": "other",
"college_physics": "stem",
"computer_security": "stem",
"conceptual_physics": "stem",
"econometrics": "social_sciences",
"electrical_engineering": "stem",
"elementary_mathematics": "stem",
"formal_logic": "humanities",
"global_facts": "other",
"high_school_biology": "stem",
"high_school_chemistry": "stem",
"high_school_computer_science": "stem",
"high_school_european_history": "humanities",
"high_school_geography": "social_sciences",
"high_school_government_and_politics": "social_sciences",
"high_school_macroeconomics": "social_sciences",
"high_school_mathematics": "stem",
"high_school_microeconomics": "social_sciences",
"high_school_physics": "stem",
"high_school_psychology": "social_sciences",
"high_school_statistics": "stem",
"high_school_us_history": "humanities",
"high_school_world_history": "humanities",
"human_aging": "other",
"human_sexuality": "social_sciences",
"international_law": "humanities",
"jurisprudence": "humanities",
"logical_fallacies": "humanities",
"machine_learning": "stem",
"management": "other",
"marketing": "other",
"medical_genetics": "other",
"miscellaneous": "other",
"moral_disputes": "humanities",
"moral_scenarios": "humanities",
"nutrition": "other",
"philosophy": "humanities",
"prehistory": "humanities",
"professional_accounting": "other",
"professional_law": "humanities",
"professional_medicine": "other",
"professional_psychology": "social_sciences",
"public_relations": "social_sciences",
"security_studies": "social_sciences",
"sociology": "social_sciences",
"us_foreign_policy": "social_sciences",
"virology": "other",
"world_religions": "humanities",
}
GROUPS = ["question_and_answer"]
def parse_args():
parser = argparse.ArgumentParser(
description="Generate configuration YAML files for LM Evaluation Harness."
)
# Path to the base YAML file from which to inherit settings
parser.add_argument(
"--base_yaml_path",
required=True,
help="Path to the base YAML configuration file.",
)
# Directory where the generated YAML files will be saved
parser.add_argument(
"--save_dir",
default="/data/local/cat/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_and_answer",
)
# Optional prefix to add to task names in the YAML files
parser.add_argument("--task_prefix", default="")
parser.add_argument("--cot_prompt_path", default=None)
# Optional prefix to add to group names in the YAML files
parser.add_argument("--group_prefix", default="")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
# Load base YAML configuration
base_yaml_name = os.path.basename(args.base_yaml_path)
with open(args.base_yaml_path, "r", encoding="utf-8") as f:
base_yaml = yaml.full_load(f)
if args.cot_prompt_path is not None:
import json
with open(args.cot_prompt_path, encoding="utf-8") as f:
cot_file = json.load(f)
for group in GROUPS:
for subject, category in tqdm(SUBJECTS.items()):
if args.cot_prompt_path is not None:
description = cot_file[subject]
else:
description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
yaml_dict = {
"include": base_yaml_name,
"tag": f"mmlusr_{args.group_prefix}{group}_{category}"
if args.group_prefix
else f"mmlusr_{group}_{category}",
"task": f"mmlusr_{args.task_prefix}{group}_{subject}"
if args.task_prefix
else f"mmlusr_{group}_{subject}",
"task_alias": subject.replace("_", " "),
"description": description,
"dataset_name": f"{group}_{subject}",
}
# File path for saving the generated YAML file
file_save_path = os.path.join(args.save_dir, f"{group}_{subject}.yaml")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(yaml_dict, yaml_file, allow_unicode=True, default_style='"')
eval_logger.info(f"Saved YAML for {group} {subject} to {file_save_path}")
# Save group configuration if specified
if args.group_prefix:
file_save_path = os.path.join(
args.save_prefix_path, args.group_prefix + ".yaml"
)
eval_logger.info(f"Saving benchmark config to {file_save_path}")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(yaml_dict, yaml_file, indent=4, default_flow_style=False)
dataset_path: NiniCat/MMLU-SR
test_split: test
fewshot_split: train
fewshot_config:
sampler: first_n
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
group: mmlusr
group_alias: MMLU-SR (Question & Answer)
task:
- group: mmlusr_qa_stem
group_alias: STEM (Question & Answer)
task:
- mmlusr_question_and_answer_stem_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 1
- group: mmlusr_qa_other
group_alias: Other (Question & Answer)
task:
- mmlusr_question_and_answer_other_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 1
- group: mmlusr_qa_social_sciences
group_alias: Social Sciences (Question & Answer)
task:
- mmlusr_question_and_answer_social_sciences_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 1
- group: mmlusr_qa_humanities
group_alias: Humanities (Question & Answer)
task:
- mmlusr_question_and_answer_humanities_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 1
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 1
"dataset_name": "question_and_answer_abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\
\ algebra.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_abstract_algebra"
"task_alias": "abstract algebra"
"dataset_name": "question_and_answer_anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\
\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_anatomy"
"task_alias": "anatomy"
"dataset_name": "question_and_answer_astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\
\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_astronomy"
"task_alias": "astronomy"
"dataset_name": "question_and_answer_business_ethics"
"description": "The following are multiple choice questions (with answers) about business\
\ ethics.\n\n"
"tag": "mmlusr_question_and_answer_other_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_business_ethics"
"task_alias": "business ethics"
"dataset_name": "question_and_answer_clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge.\n\n"
"tag": "mmlusr_question_and_answer_other_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_clinical_knowledge"
"task_alias": "clinical knowledge"
"dataset_name": "question_and_answer_college_biology"
"description": "The following are multiple choice questions (with answers) about college\
\ biology.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_college_biology"
"task_alias": "college biology"
"dataset_name": "question_and_answer_college_chemistry"
"description": "The following are multiple choice questions (with answers) about college\
\ chemistry.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_college_chemistry"
"task_alias": "college chemistry"
"dataset_name": "question_and_answer_college_computer_science"
"description": "The following are multiple choice questions (with answers) about college\
\ computer science.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_college_computer_science"
"task_alias": "college computer science"
"dataset_name": "question_and_answer_college_mathematics"
"description": "The following are multiple choice questions (with answers) about college\
\ mathematics.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_college_mathematics"
"task_alias": "college mathematics"
"dataset_name": "question_and_answer_college_medicine"
"description": "The following are multiple choice questions (with answers) about college\
\ medicine.\n\n"
"tag": "mmlusr_question_and_answer_other_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_college_medicine"
"task_alias": "college medicine"
"dataset_name": "question_and_answer_college_physics"
"description": "The following are multiple choice questions (with answers) about college\
\ physics.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_college_physics"
"task_alias": "college physics"
"dataset_name": "question_and_answer_computer_security"
"description": "The following are multiple choice questions (with answers) about computer\
\ security.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_computer_security"
"task_alias": "computer security"
"dataset_name": "question_and_answer_conceptual_physics"
"description": "The following are multiple choice questions (with answers) about conceptual\
\ physics.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_conceptual_physics"
"task_alias": "conceptual physics"
"dataset_name": "question_and_answer_econometrics"
"description": "The following are multiple choice questions (with answers) about econometrics.\n\
\n"
"tag": "mmlusr_question_and_answer_social_sciences_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_econometrics"
"task_alias": "econometrics"
"dataset_name": "question_and_answer_electrical_engineering"
"description": "The following are multiple choice questions (with answers) about electrical\
\ engineering.\n\n"
"tag": "mmlusr_question_and_answer_stem_tasks"
"include": "_mmlusr_qna_yml"
"task": "mmlusr_question_and_answer_electrical_engineering"
"task_alias": "electrical engineering"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment