Commit 5be2bb10 authored by lintangsutawika's avatar lintangsutawika
Browse files

update files

parent bfbda3b3
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
import yaml
from tqdm import tqdm
eval_logger = logging.getLogger("lm-eval")
SUBJECTS = {
"business": "other",
"law": "humanities",
"psychology": "social_sciences",
"biology": "stem",
"chemistry": "stem",
"history": "humanities",
"other": "other",
"health": "other",
"economics": "social_sciences",
"math": "stem",
"physics": "stem",
"computer_science": "stem",
"philosophy": "humanities",
"engineering": "stem"
}
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_yaml_path", required=True)
parser.add_argument("--save_prefix_path", default="mmlu_pro")
parser.add_argument("--cot_prompt_path", default=None)
parser.add_argument("--task_prefix", default="")
parser.add_argument("--group_prefix", default="")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f)
if args.cot_prompt_path is not None:
import json
with open(args.cot_prompt_path, encoding="utf-8") as f:
cot_file = json.load(f)
ALL_CATEGORIES = []
for subject, category in tqdm(SUBJECTS.items()):
if category not in ALL_CATEGORIES:
ALL_CATEGORIES.append(category)
if args.cot_prompt_path is not None:
description = cot_file[subject]
else:
description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
yaml_dict = {
"include": base_yaml_name,
"group": f"mmlu_pro_{args.task_prefix}_{category}"
if args.task_prefix != ""
else f"mmlu_pro_{category}",
"group_alias": category.replace("_", " "),
"task": f"mmlu_pro_{args.task_prefix}_{subject}"
if args.task_prefix != ""
else f"mmlu_pro_{subject}",
"task_alias": subject.replace("_", " "),
"dataset_name": subject,
"description": description,
}
file_save_path = args.save_prefix_path + f"_{subject}.yaml"
eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
yaml_dict,
yaml_file,
allow_unicode=True,
default_style='"',
)
if args.task_prefix != "":
mmlu_pro_subcategories = [
f"mmlu_pro_{args.task_prefix}_{category}" for category in ALL_CATEGORIES
]
else:
mmlu_pro_subcategories = [f"mmlu_pro_{category}" for category in ALL_CATEGORIES]
if args.group_prefix != "":
file_save_path = args.group_prefix + ".yaml"
else:
file_save_path = args.save_prefix_path + ".yaml"
eval_logger.info(f"Saving benchmark config to {file_save_path}")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
{
"group": f"mmlu_pro_{args.task_prefix}"
if args.task_prefix != ""
else "mmlu_pro",
"task": mmlu_pro_subcategories,
},
yaml_file,
indent=4,
default_flow_style=False,
)
dataset_path: sjyuxyz/MMLU-Pro-with-subset
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nE. {{choices[4]}}\nF. {{choices[5]}}\nG. {{choices[6]}}\nH. {{choices[7]}}\nI. {{choices[8]}}\nJ. {{choices[9]}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D", "E","F","G","H","I","J"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
group: mmlu_pro
task:
- mmlu_pro_stem
- mmlu_pro_other
- mmlu_pro_social_sciences
- mmlu_pro_humanities
"dataset_name": "biology"
"description": "The following are multiple choice questions (with answers) about biology.\n\
\n"
"group": "mmlu_pro_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_pro_biology"
"task_alias": "biology"
"dataset_name": "business"
"description": "The following are multiple choice questions (with answers) about business.\n\
\n"
"group": "mmlu_pro_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_pro_business"
"task_alias": "business"
"dataset_name": "math"
"description": "The following are multiple choice questions (with answers) about math.\n\
\n"
"group": "mmlu_pro_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_pro_math"
"task_alias": "math"
"dataset_name": "computer_science"
"description": "The following are multiple choice questions (with answers) about computer_science.\n\
\n"
"group": "mmlu_pro_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_pro_computer_science"
"task_alias": "computer_science"
"dataset_name": "economics"
"description": "The following are multiple choice questions (with answers) about economics.\n\
\n"
"group": "mmlu_pro_social_sciences"
"group_alias": "social_sciences"
"include": "_default_template_yaml"
"task": "mmlu_pro_economics"
"task_alias": "economics"
"dataset_name": "engineering"
"description": "The following are multiple choice questions (with answers) about engineering.\n\
\n"
"group": "mmlu_pro_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_pro_engineering"
"task_alias": "engineering"
"dataset_name": "health"
"description": "The following are multiple choice questions (with answers) about health.\n\
\n"
"group": "mmlu_pro_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_pro_health"
"task_alias": "health"
"dataset_name": "history"
"description": "The following are multiple choice questions (with answers) about history.\n\
\n"
"group": "mmlu_pro_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "mmlu_pro_history"
"task_alias": "history"
"dataset_name": "law"
"description": "The following are multiple choice questions (with answers) about law.\n\
\n"
"group": "mmlu_pro_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "mmlu_pro_law"
"task_alias": "law"
"dataset_name": "math"
"description": "The following are multiple choice questions (with answers) about math.\n\
\n"
"group": "mmlu_pro_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_pro_math"
"task_alias": "math"
"dataset_name": "other"
"description": "The following are multiple choice questions (with answers) about other.\n\
\n"
"group": "mmlu_pro_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_pro_other"
"task_alias": "other"
"dataset_name": "philosophy"
"description": "The following are multiple choice questions (with answers) about philosophy.\n\
\n"
"group": "mmlu_pro_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "mmlu_pro_philosophy"
"task_alias": "philosophy"
"dataset_name": "physics"
"description": "The following are multiple choice questions (with answers) about physics.\n\
\n"
"group": "mmlu_pro_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_pro_physics"
"task_alias": "physics"
"dataset_name": "psychology"
"description": "The following are multiple choice questions (with answers) about psychology.\n\
\n"
"group": "mmlu_pro_social_sciences"
"group_alias": "social_sciences"
"include": "_default_template_yaml"
"task": "mmlu_pro_psychology"
"task_alias": "psychology"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment