update files

5be2bb10 · lintangsutawika · bfbda3b3 · bfbda3b3 · bfbda3b3 · bfbda3b3
Commit 5be2bb10 authored Jun 27, 2024 by lintangsutawika
17 changed files
--- a/lm_eval/tasks/mmlu_pro/_generate_configs.py
+++ b/lm_eval/tasks/mmlu_pro/_generate_configs.py
-"""
-Take in a YAML, and output all "other" splits with this YAML
-"""
-import argparse
-import logging
-import os
-import yaml
-from tqdm import tqdm
-eval_logger = logging.getLogger("lm-eval")
-SUBJECTS = {
-    "business": "other",
-    "law": "humanities",
-    "psychology": "social_sciences",
-    "biology": "stem",
-    "chemistry": "stem",
-    "history": "humanities",
-    "other": "other",
-    "health": "other",
-    "economics": "social_sciences",
-    "math": "stem",
-    "physics": "stem",
-    "computer_science": "stem",
-    "philosophy": "humanities",
-    "engineering": "stem"
-}
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--base_yaml_path", required=True)
-    parser.add_argument("--save_prefix_path", default="mmlu_pro")
-    parser.add_argument("--cot_prompt_path", default=None)
-    parser.add_argument("--task_prefix", default="")
-    parser.add_argument("--group_prefix", default="")
-    return parser.parse_args()
-if __name__ == "__main__":
-    args = parse_args()
-    # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
-    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path, encoding="utf-8") as f:
-        base_yaml = yaml.full_load(f)
-    if args.cot_prompt_path is not None:
-        import json
-        with open(args.cot_prompt_path, encoding="utf-8") as f:
-            cot_file = json.load(f)
-    ALL_CATEGORIES = []
-    for subject, category in tqdm(SUBJECTS.items()):
-        if category not in ALL_CATEGORIES:
-            ALL_CATEGORIES.append(category)
-        if args.cot_prompt_path is not None:
-            description = cot_file[subject]
-        else:
-            description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
-        yaml_dict = {
-            "include": base_yaml_name,
-            "group": f"mmlu_pro_{args.task_prefix}_{category}"
-            if args.task_prefix != ""
-            else f"mmlu_pro_{category}",
-            "group_alias": category.replace("_", " "),
-            "task": f"mmlu_pro_{args.task_prefix}_{subject}"
-            if args.task_prefix != ""
-            else f"mmlu_pro_{subject}",
-            "task_alias": subject.replace("_", " "),
-            "dataset_name": subject,
-            "description": description,
-        }
-        file_save_path = args.save_prefix_path + f"_{subject}.yaml"
-        eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
-        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
-            yaml.dump(
-                yaml_dict,
-                yaml_file,
-                allow_unicode=True,
-                default_style='"',
-            )
-    if args.task_prefix != "":
-        mmlu_pro_subcategories = [
-            f"mmlu_pro_{args.task_prefix}_{category}" for category in ALL_CATEGORIES
-        ]
-    else:
-        mmlu_pro_subcategories = [f"mmlu_pro_{category}" for category in ALL_CATEGORIES]
-    if args.group_prefix != "":
-        file_save_path = args.group_prefix + ".yaml"
-    else:
-        file_save_path = args.save_prefix_path + ".yaml"
-    eval_logger.info(f"Saving benchmark config to {file_save_path}")
-    with open(file_save_path, "w", encoding="utf-8") as yaml_file:
-        yaml.dump(
-            {
-                "group": f"mmlu_pro_{args.task_prefix}"
-                if args.task_prefix != ""
-                else "mmlu_pro",
-                "task": mmlu_pro_subcategories,
-            },
-            yaml_file,
-            indent=4,
-            default_flow_style=False,
-        )
--- a/lm_eval/tasks/mmlu_pro/default/_default_template_yaml
+++ b/lm_eval/tasks/mmlu_pro/default/_default_template_yaml
-dataset_path: sjyuxyz/MMLU-Pro-with-subset
-test_split: test
-fewshot_split: dev
-fewshot_config:
-  sampler: first_n
-output_type: multiple_choice
-doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nE. {{choices[4]}}\nF. {{choices[5]}}\nG. {{choices[6]}}\nH. {{choices[7]}}\nI. {{choices[8]}}\nJ. {{choices[9]}}\nAnswer:"
-doc_to_choice: ["A", "B", "C", "D", "E","F","G","H","I","J"]
-doc_to_target: answer
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
-metadata:
-  version: 0.0
--- a/lm_eval/tasks/mmlu_pro/default/_mmlu_pro.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/_mmlu_pro.yaml
-group: mmlu_pro
-task:
-  - mmlu_pro_stem
-  - mmlu_pro_other
-  - mmlu_pro_social_sciences
-  - mmlu_pro_humanities
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_biology.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_biology.yaml
-"dataset_name": "biology"
-"description": "The following are multiple choice questions (with answers) about biology.\n\
-  \n"
-"group": "mmlu_pro_stem"
-"group_alias": "stem"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_biology"
-"task_alias": "biology"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_business.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_business.yaml
-"dataset_name": "business"
-"description": "The following are multiple choice questions (with answers) about business.\n\
-  \n"
-"group": "mmlu_pro_other"
-"group_alias": "other"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_business"
-"task_alias": "business"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_chemistry.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_chemistry.yaml
-"dataset_name": "math"
-"description": "The following are multiple choice questions (with answers) about math.\n\
-  \n"
-"group": "mmlu_pro_stem"
-"group_alias": "stem"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_math"
-"task_alias": "math"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_computer_science.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_computer_science.yaml
-"dataset_name": "computer_science"
-"description": "The following are multiple choice questions (with answers) about computer_science.\n\
-  \n"
-"group": "mmlu_pro_stem"
-"group_alias": "stem"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_computer_science"
-"task_alias": "computer_science"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_economics.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_economics.yaml
-"dataset_name": "economics"
-"description": "The following are multiple choice questions (with answers) about economics.\n\
-  \n"
-"group": "mmlu_pro_social_sciences"
-"group_alias": "social_sciences"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_economics"
-"task_alias": "economics"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_engineering.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_engineering.yaml
-"dataset_name": "engineering"
-"description": "The following are multiple choice questions (with answers) about engineering.\n\
-  \n"
-"group": "mmlu_pro_stem"
-"group_alias": "stem"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_engineering"
-"task_alias": "engineering"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_health.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_health.yaml
-"dataset_name": "health"
-"description": "The following are multiple choice questions (with answers) about health.\n\
-  \n"
-"group": "mmlu_pro_other"
-"group_alias": "other"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_health"
-"task_alias": "health"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_history.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_history.yaml
-"dataset_name": "history"
-"description": "The following are multiple choice questions (with answers) about history.\n\
-  \n"
-"group": "mmlu_pro_humanities"
-"group_alias": "humanities"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_history"
-"task_alias": "history"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_law.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_law.yaml
-"dataset_name": "law"
-"description": "The following are multiple choice questions (with answers) about law.\n\
-  \n"
-"group": "mmlu_pro_humanities"
-"group_alias": "humanities"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_law"
-"task_alias": "law"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_math.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_math.yaml
-"dataset_name": "math"
-"description": "The following are multiple choice questions (with answers) about math.\n\
-  \n"
-"group": "mmlu_pro_stem"
-"group_alias": "stem"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_math"
-"task_alias": "math"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_other.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_other.yaml
-"dataset_name": "other"
-"description": "The following are multiple choice questions (with answers) about other.\n\
-  \n"
-"group": "mmlu_pro_other"
-"group_alias": "other"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_other"
-"task_alias": "other"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_philosophy.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_philosophy.yaml
-"dataset_name": "philosophy"
-"description": "The following are multiple choice questions (with answers) about philosophy.\n\
-  \n"
-"group": "mmlu_pro_humanities"
-"group_alias": "humanities"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_philosophy"
-"task_alias": "philosophy"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_physics.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_physics.yaml
-"dataset_name": "physics"
-"description": "The following are multiple choice questions (with answers) about physics.\n\
-  \n"
-"group": "mmlu_pro_stem"
-"group_alias": "stem"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_physics"
-"task_alias": "physics"
--- a/lm_eval/tasks/mmlu_pro/default/mmlu_pro_psychology.yaml
+++ b/lm_eval/tasks/mmlu_pro/default/mmlu_pro_psychology.yaml
-"dataset_name": "psychology"
-"description": "The following are multiple choice questions (with answers) about psychology.\n\
-  \n"
-"group": "mmlu_pro_social_sciences"
-"group_alias": "social_sciences"
-"include": "_default_template_yaml"
-"task": "mmlu_pro_psychology"
-"task_alias": "psychology"