Commit 92f25463 authored by lintangsutawika's avatar lintangsutawika
Browse files

modfied to add subcategory

parent afda3d22
""" """
Take in a YAML, and output all other splits with this YAML Take in a YAML, and output all "other" splits with this YAML
""" """
import os import os
import yaml import yaml
...@@ -10,65 +10,65 @@ from tqdm import tqdm ...@@ -10,65 +10,65 @@ from tqdm import tqdm
from lm_eval import utils from lm_eval import utils
from lm_eval.logger import eval_logger from lm_eval.logger import eval_logger
SUBJECTS = [ SUBJECTS = {
"abstract_algebra", "abstract_algebra": "stem",
"anatomy", "anatomy": "stem",
"astronomy", "astronomy": "stem",
"business_ethics", "business_ethics": "other",
"clinical_knowledge", "clinical_knowledge": "other",
"college_biology", "college_biology": "stem",
"college_chemistry", "college_chemistry": "stem",
"college_computer_science", "college_computer_science": "stem",
"college_mathematics", "college_mathematics": "stem",
"college_medicine", "college_medicine": "other",
"college_physics", "college_physics": "stem",
"computer_security", "computer_security": "stem",
"conceptual_physics", "conceptual_physics": "stem",
"econometrics", "econometrics": "social_sciences",
"electrical_engineering", "electrical_engineering": "stem",
"elementary_mathematics", "elementary_mathematics": "stem",
"formal_logic", "formal_logic": "humanities",
"global_facts", "global_facts": "other",
"high_school_biology", "high_school_biology": "stem",
"high_school_chemistry", "high_school_chemistry": "stem",
"high_school_computer_science", "high_school_computer_science": "stem",
"high_school_european_history", "high_school_european_history": "humanities",
"high_school_geography", "high_school_geography": "social_sciences",
"high_school_government_and_politics", "high_school_government_and_politics": "social_sciences",
"high_school_macroeconomics", "high_school_macroeconomics": "social_sciences",
"high_school_mathematics", "high_school_mathematics": "stem",
"high_school_microeconomics", "high_school_microeconomics": "social_sciences",
"high_school_physics", "high_school_physics": "stem",
"high_school_psychology", "high_school_psychology": "social_sciences",
"high_school_statistics", "high_school_statistics": "stem",
"high_school_us_history", "high_school_us_history": "humanities",
"high_school_world_history", "high_school_world_history": "humanities",
"human_aging", "human_aging": "other",
"human_sexuality", "human_sexuality": "social_sciences",
"international_law", "international_law": "humanities",
"jurisprudence", "jurisprudence": "humanities",
"logical_fallacies", "logical_fallacies": "humanities",
"machine_learning", "machine_learning": "stem",
"management", "management": "other",
"marketing", "marketing": "other",
"medical_genetics", "medical_genetics": "other",
"miscellaneous", "miscellaneous": "other",
"moral_disputes", "moral_disputes": "humanities",
"moral_scenarios", "moral_scenarios": "humanities",
"nutrition", "nutrition": "other",
"philosophy", "philosophy": "humanities",
"prehistory", "prehistory": "humanities",
"professional_accounting", "professional_accounting": "other",
"professional_law", "professional_law": "humanities",
"professional_medicine", "professional_medicine": "other",
"professional_psychology", "professional_psychology": "social_sciences",
"public_relations", "public_relations": "social_sciences",
"security_studies", "security_studies": "social_sciences",
"sociology", "sociology": "social_sciences",
"us_foreign_policy", "us_foreign_policy": "social_sciences",
"virology", "virology": "other",
"world_religions", "world_religions": "humanities",
] }
def parse_args(): def parse_args():
...@@ -77,6 +77,7 @@ def parse_args(): ...@@ -77,6 +77,7 @@ def parse_args():
parser.add_argument("--save_prefix_path", default="flan") parser.add_argument("--save_prefix_path", default="flan")
parser.add_argument("--cot_prompt_path", default=None) parser.add_argument("--cot_prompt_path", default=None)
parser.add_argument("--task_prefix", default="") parser.add_argument("--task_prefix", default="")
parser.add_argument("--group_prefix", default="")
return parser.parse_args() return parser.parse_args()
...@@ -84,7 +85,7 @@ if __name__ == "__main__": ...@@ -84,7 +85,7 @@ if __name__ == "__main__":
args = parse_args() args = parse_args()
# get filename of base_yaml so we can `"include": ` it in our other YAMLs. # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1] base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f: with open(args.base_yaml_path) as f:
base_yaml = yaml.full_load(f) base_yaml = yaml.full_load(f)
...@@ -95,7 +96,12 @@ if __name__ == "__main__": ...@@ -95,7 +96,12 @@ if __name__ == "__main__":
with open(args.cot_prompt_path) as f: with open(args.cot_prompt_path) as f:
cot_file = json.load(f) cot_file = json.load(f)
for subject in tqdm(SUBJECTS): ALL_CATEGORIES = []
for subject, category in tqdm(SUBJECTS.items()):
if category not in ALL_CATEGORIES:
ALL_CATEGORIES.append(category)
if args.cot_prompt_path is not None: if args.cot_prompt_path is not None:
description = cot_file[subject] description = cot_file[subject]
else: else:
...@@ -103,6 +109,7 @@ if __name__ == "__main__": ...@@ -103,6 +109,7 @@ if __name__ == "__main__":
yaml_dict = { yaml_dict = {
"include": base_yaml_name, "include": base_yaml_name,
"group": f"mmlu_{category}",
"task": f"mmlu_{args.task_prefix}_{subject}" "task": f"mmlu_{args.task_prefix}_{subject}"
if args.task_prefix != "" if args.task_prefix != ""
else f"mmlu_{subject}", else f"mmlu_{subject}",
...@@ -120,3 +127,18 @@ if __name__ == "__main__": ...@@ -120,3 +127,18 @@ if __name__ == "__main__":
allow_unicode=True, allow_unicode=True,
default_style='"', default_style='"',
) )
if args.group_prefix == "":
file_save_path = args.save_prefix_path + ".yaml"
else:
file_save_path = args.save_prefix_path + f"_{args.group_prefix}.yaml"
eval_logger.info(f"Saving benchmark config to {file_save_path}")
with open(file_save_path, "w") as yaml_file:
yaml.dump(
{
"group": f"mmlu_{args.group_prefix}",
"task": [f"mmlu_{category}" for category in ALL_CATEGORIES]
},
yaml_file,
default_flow_style=False
)
group: mmlu
dataset_path: cais/mmlu dataset_path: cais/mmlu
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
......
group: mmlu
task:
- mmlu_stem
- mmlu_other
- mmlu_social_sciences
- mmlu_humanities
"dataset_name": "abstract_algebra" "dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_abstract_algebra" "task": "mmlu_abstract_algebra"
"dataset_name": "anatomy" "dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" "description": "The following are multiple choice questions (with answers) about anatomy.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_anatomy" "task": "mmlu_anatomy"
"dataset_name": "astronomy" "dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" "description": "The following are multiple choice questions (with answers) about astronomy.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_astronomy" "task": "mmlu_astronomy"
"dataset_name": "business_ethics" "dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" "description": "The following are multiple choice questions (with answers) about business ethics.\n\n"
"group": "mmlu_other"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_business_ethics" "task": "mmlu_business_ethics"
"dataset_name": "clinical_knowledge" "dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n"
"group": "mmlu_other"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_clinical_knowledge" "task": "mmlu_clinical_knowledge"
"dataset_name": "college_biology" "dataset_name": "college_biology"
"description": "The following are multiple choice questions (with answers) about college biology.\n\n" "description": "The following are multiple choice questions (with answers) about college biology.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_college_biology" "task": "mmlu_college_biology"
"dataset_name": "college_chemistry" "dataset_name": "college_chemistry"
"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_college_chemistry" "task": "mmlu_college_chemistry"
"dataset_name": "college_computer_science" "dataset_name": "college_computer_science"
"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" "description": "The following are multiple choice questions (with answers) about college computer science.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_college_computer_science" "task": "mmlu_college_computer_science"
"dataset_name": "college_mathematics" "dataset_name": "college_mathematics"
"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_college_mathematics" "task": "mmlu_college_mathematics"
"dataset_name": "college_medicine" "dataset_name": "college_medicine"
"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" "description": "The following are multiple choice questions (with answers) about college medicine.\n\n"
"group": "mmlu_other"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_college_medicine" "task": "mmlu_college_medicine"
"dataset_name": "college_physics" "dataset_name": "college_physics"
"description": "The following are multiple choice questions (with answers) about college physics.\n\n" "description": "The following are multiple choice questions (with answers) about college physics.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_college_physics" "task": "mmlu_college_physics"
"dataset_name": "computer_security" "dataset_name": "computer_security"
"description": "The following are multiple choice questions (with answers) about computer security.\n\n" "description": "The following are multiple choice questions (with answers) about computer security.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_computer_security" "task": "mmlu_computer_security"
"dataset_name": "conceptual_physics" "dataset_name": "conceptual_physics"
"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_conceptual_physics" "task": "mmlu_conceptual_physics"
"dataset_name": "econometrics" "dataset_name": "econometrics"
"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" "description": "The following are multiple choice questions (with answers) about econometrics.\n\n"
"group": "mmlu_social_sciences"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_econometrics" "task": "mmlu_econometrics"
"dataset_name": "electrical_engineering" "dataset_name": "electrical_engineering"
"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_electrical_engineering" "task": "mmlu_electrical_engineering"
"dataset_name": "elementary_mathematics" "dataset_name": "elementary_mathematics"
"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n"
"group": "mmlu_stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_elementary_mathematics" "task": "mmlu_elementary_mathematics"
"dataset_name": "formal_logic" "dataset_name": "formal_logic"
"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" "description": "The following are multiple choice questions (with answers) about formal logic.\n\n"
"group": "mmlu_humanities"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "mmlu_formal_logic" "task": "mmlu_formal_logic"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment