modfied to add subcategory

92f25463 · lintangsutawika · afda3d22 · 92f25463 · 92f25463 · 92f25463
Commit 92f25463 authored Oct 16, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/mmlu/_generate_configs.py
+++ b/lm_eval/tasks/mmlu/_generate_configs.py
 """
-Take in a YAML, and output all other splits with this YAML
+Take in a YAML, and output all "other" splits with this YAML
 """
 import os
 import yaml
@@ -10,65 +10,65 @@ from tqdm import tqdm
 from lm_eval import utils
 from lm_eval.logger import eval_logger
-SUBJECTS = [
+SUBJECTS = {
-    "abstract_algebra",
+    "abstract_algebra": "stem",
-    "anatomy",
+    "anatomy": "stem",
-    "astronomy",
+    "astronomy": "stem",
-    "business_ethics",
+    "business_ethics": "other",
-    "clinical_knowledge",
+    "clinical_knowledge": "other",
-    "college_biology",
+    "college_biology": "stem",
-    "college_chemistry",
+    "college_chemistry": "stem",
-    "college_computer_science",
+    "college_computer_science": "stem",
-    "college_mathematics",
+    "college_mathematics": "stem",
-    "college_medicine",
+    "college_medicine": "other",
-    "college_physics",
+    "college_physics": "stem",
-    "computer_security",
+    "computer_security": "stem",
-    "conceptual_physics",
+    "conceptual_physics": "stem",
-    "econometrics",
+    "econometrics": "social_sciences",
-    "electrical_engineering",
+    "electrical_engineering": "stem",
-    "elementary_mathematics",
+    "elementary_mathematics": "stem",
-    "formal_logic",
+    "formal_logic": "humanities",
-    "global_facts",
+    "global_facts": "other",
-    "high_school_biology",
+    "high_school_biology": "stem",
-    "high_school_chemistry",
+    "high_school_chemistry": "stem",
-    "high_school_computer_science",
+    "high_school_computer_science": "stem",
-    "high_school_european_history",
+    "high_school_european_history": "humanities",
-    "high_school_geography",
+    "high_school_geography": "social_sciences",
-    "high_school_government_and_politics",
+    "high_school_government_and_politics": "social_sciences",
-    "high_school_macroeconomics",
+    "high_school_macroeconomics": "social_sciences",
-    "high_school_mathematics",
+    "high_school_mathematics": "stem",
-    "high_school_microeconomics",
+    "high_school_microeconomics": "social_sciences",
-    "high_school_physics",
+    "high_school_physics": "stem",
-    "high_school_psychology",
+    "high_school_psychology": "social_sciences",
-    "high_school_statistics",
+    "high_school_statistics": "stem",
-    "high_school_us_history",
+    "high_school_us_history": "humanities",
-    "high_school_world_history",
+    "high_school_world_history": "humanities",
-    "human_aging",
+    "human_aging": "other",
-    "human_sexuality",
+    "human_sexuality": "social_sciences",
-    "international_law",
+    "international_law": "humanities",
-    "jurisprudence",
+    "jurisprudence": "humanities",
-    "logical_fallacies",
+    "logical_fallacies": "humanities",
-    "machine_learning",
+    "machine_learning": "stem",
-    "management",
+    "management": "other",
-    "marketing",
+    "marketing": "other",
-    "medical_genetics",
+    "medical_genetics": "other",
-    "miscellaneous",
+    "miscellaneous": "other",
-    "moral_disputes",
+    "moral_disputes": "humanities",
-    "moral_scenarios",
+    "moral_scenarios": "humanities",
-    "nutrition",
+    "nutrition": "other",
-    "philosophy",
+    "philosophy": "humanities",
-    "prehistory",
+    "prehistory": "humanities",
-    "professional_accounting",
+    "professional_accounting": "other",
-    "professional_law",
+    "professional_law": "humanities",
-    "professional_medicine",
+    "professional_medicine": "other",
-    "professional_psychology",
+    "professional_psychology": "social_sciences",
-    "public_relations",
+    "public_relations": "social_sciences",
-    "security_studies",
+    "security_studies": "social_sciences",
-    "sociology",
+    "sociology": "social_sciences",
-    "us_foreign_policy",
+    "us_foreign_policy": "social_sciences",
-    "virology",
+    "virology": "other",
-    "world_religions",
+    "world_religions": "humanities",
-]
+}
 def parse_args():
@@ -77,6 +77,7 @@ def parse_args():
    parser.add_argument("--save_prefix_path", default="flan")
    parser.add_argument("--cot_prompt_path", default=None)
    parser.add_argument("--task_prefix", default="")
+    parser.add_argument("--group_prefix", default="")
    return parser.parse_args()
@@ -84,7 +85,7 @@ if __name__ == "__main__":
    args = parse_args()
-    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
+    # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
    with open(args.base_yaml_path) as f:
        base_yaml = yaml.full_load(f)
@@ -95,7 +96,12 @@ if __name__ == "__main__":
        with open(args.cot_prompt_path) as f:
            cot_file = json.load(f)
-    for subject in tqdm(SUBJECTS):
+    ALL_CATEGORIES = []
+    for subject, category in tqdm(SUBJECTS.items()):
+        if category not in ALL_CATEGORIES:
+            ALL_CATEGORIES.append(category)
        if args.cot_prompt_path is not None:
            description = cot_file[subject]
        else:
@@ -103,6 +109,7 @@ if __name__ == "__main__":
        yaml_dict = {
            "include": base_yaml_name,
+            "group": f"mmlu_{category}",
            "task": f"mmlu_{args.task_prefix}_{subject}"
            if args.task_prefix != ""
            else f"mmlu_{subject}",
@@ -120,3 +127,18 @@ if __name__ == "__main__":
                allow_unicode=True,
                default_style='"',
            )
+    if args.group_prefix == "":
+        file_save_path = args.save_prefix_path + ".yaml"
+    else:
+        file_save_path = args.save_prefix_path + f"_{args.group_prefix}.yaml"
+    eval_logger.info(f"Saving benchmark config to {file_save_path}")
+    with open(file_save_path, "w") as yaml_file:
+        yaml.dump(
+            {
+                "group": f"mmlu_{args.group_prefix}",
+                "task": [f"mmlu_{category}" for category in ALL_CATEGORIES]
+                },
+            yaml_file,
+            default_flow_style=False
+        )
--- a/lm_eval/tasks/mmlu/default/_default_template_yaml
+++ b/lm_eval/tasks/mmlu/default/_default_template_yaml
-group: mmlu
 dataset_path: cais/mmlu
 test_split: test
 fewshot_split: dev

--- a/lm_eval/tasks/mmlu/default/mmlu.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu.yaml
+group: mmlu
+task:
+  - mmlu_stem
+  - mmlu_other
+  - mmlu_social_sciences
+  - mmlu_humanities
--- a/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml
 "dataset_name": "abstract_algebra"
 "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_abstract_algebra"
--- a/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml
 "dataset_name": "anatomy"
 "description": "The following are multiple choice questions (with answers) about anatomy.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_anatomy"
--- a/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml
 "dataset_name": "astronomy"
 "description": "The following are multiple choice questions (with answers) about astronomy.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_astronomy"
--- a/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml
 "dataset_name": "business_ethics"
 "description": "The following are multiple choice questions (with answers) about business ethics.\n\n"
+"group": "mmlu_other"
 "include": "_default_template_yaml"
 "task": "mmlu_business_ethics"
--- a/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml
 "dataset_name": "clinical_knowledge"
 "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n"
+"group": "mmlu_other"
 "include": "_default_template_yaml"
 "task": "mmlu_clinical_knowledge"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml
 "dataset_name": "college_biology"
 "description": "The following are multiple choice questions (with answers) about college biology.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_college_biology"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml
 "dataset_name": "college_chemistry"
 "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_college_chemistry"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml
 "dataset_name": "college_computer_science"
 "description": "The following are multiple choice questions (with answers) about college computer science.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_college_computer_science"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml
 "dataset_name": "college_mathematics"
 "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_college_mathematics"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml
 "dataset_name": "college_medicine"
 "description": "The following are multiple choice questions (with answers) about college medicine.\n\n"
+"group": "mmlu_other"
 "include": "_default_template_yaml"
 "task": "mmlu_college_medicine"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml
 "dataset_name": "college_physics"
 "description": "The following are multiple choice questions (with answers) about college physics.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_college_physics"
--- a/lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml
 "dataset_name": "computer_security"
 "description": "The following are multiple choice questions (with answers) about computer security.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_computer_security"
--- a/lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml
 "dataset_name": "conceptual_physics"
 "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_conceptual_physics"
--- a/lm_eval/tasks/mmlu/default/mmlu_econometrics.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_econometrics.yaml
 "dataset_name": "econometrics"
 "description": "The following are multiple choice questions (with answers) about econometrics.\n\n"
+"group": "mmlu_social_sciences"
 "include": "_default_template_yaml"
 "task": "mmlu_econometrics"
--- a/lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml
 "dataset_name": "electrical_engineering"
 "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_electrical_engineering"
--- a/lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml
 "dataset_name": "elementary_mathematics"
 "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n"
+"group": "mmlu_stem"
 "include": "_default_template_yaml"
 "task": "mmlu_elementary_mathematics"
--- a/lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml
 "dataset_name": "formal_logic"
 "description": "The following are multiple choice questions (with answers) about formal logic.\n\n"
+"group": "mmlu_humanities"
 "include": "_default_template_yaml"
 "task": "mmlu_formal_logic"