merge conflict

470059f6 · lintangsutawika · b8d7d6c3 · 9d030712 · 470059f6 · 470059f6
Commit 470059f6 authored Nov 24, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/logiqa2/logieval.yaml
+++ b/lm_eval/tasks/logiqa2/logieval.yaml
 task: logieval
 dataset_path: baber/logiqa2
 dataset_name: logieval
-output_type: greedy_until
+output_type: generate_until
 training_split: train
 test_split: test
 # Instructions + {content}

--- a/lm_eval/tasks/mgsm/direct/direct_yaml
+++ b/lm_eval/tasks/mgsm/direct/direct_yaml
@@ -4,7 +4,7 @@
 group: mgsm_direct
 dataset_path: juletxara/mgsm
 dataset_name: null  # Overridden by language-specific config.
-output_type: greedy_until
+output_type: generate_until
 training_split: train
 test_split: test
 target_delimiter: ""

--- a/lm_eval/tasks/mgsm/en_cot/cot_yaml
+++ b/lm_eval/tasks/mgsm/en_cot/cot_yaml
@@ -4,7 +4,7 @@
 group: mgsm_cot_native
 dataset_path: juletxara/mgsm
 dataset_name: null  # Overridden by language-specific config.
-output_type: greedy_until
+output_type: generate_until
 training_split: train
 test_split: test
 target_delimiter: ""

--- a/lm_eval/tasks/mgsm/native_cot/cot_yaml
+++ b/lm_eval/tasks/mgsm/native_cot/cot_yaml
@@ -4,7 +4,7 @@
 group: mgsm_cot_native
 dataset_path: juletxara/mgsm
 dataset_name: null  # Overridden by language-specific config.
-output_type: greedy_until
+output_type: generate_until
 training_split: train
 test_split: test
 target_delimiter: ""

--- a/lm_eval/tasks/minerva_math/README.md
+++ b/lm_eval/tasks/minerva_math/README.md
@@ -37,7 +37,7 @@ Eprint = {arXiv:2206.14858},
 #### Groups
 - `math_word_problems`
- `greedy_until`
+- `generate_until`
 #### Tasks

--- a/lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
+++ b/lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
@@ -4,7 +4,7 @@ task: minerva_math_algebra
 dataset_path: EleutherAI/hendrycks_math
 process_docs: !function utils.process_docs
 dataset_name: algebra
-output_type: greedy_until
+output_type: generate_until
 training_split: train
 test_split: test
 doc_to_text:  !function utils.doc_to_text

--- a/lm_eval/tasks/minerva_math/utils.py
+++ b/lm_eval/tasks/minerva_math/utils.py
 import datasets
 import re
 import signal
-from lm_eval.logger import eval_logger
+from lm_eval.utils import eval_logger
 from typing import Optional, List, Dict
 try:

--- a/lm_eval/tasks/mmlu/_generate_configs.py
+++ b/lm_eval/tasks/mmlu/_generate_configs.py
+"""
+Take in a YAML, and output all "other" splits with this YAML
+"""
+import os
+import yaml
+import argparse
+from tqdm import tqdm
+from lm_eval import utils
+from lm_eval.logger import eval_logger
+SUBJECTS = {
+    "abstract_algebra": "stem",
+    "anatomy": "stem",
+    "astronomy": "stem",
+    "business_ethics": "other",
+    "clinical_knowledge": "other",
+    "college_biology": "stem",
+    "college_chemistry": "stem",
+    "college_computer_science": "stem",
+    "college_mathematics": "stem",
+    "college_medicine": "other",
+    "college_physics": "stem",
+    "computer_security": "stem",
+    "conceptual_physics": "stem",
+    "econometrics": "social_sciences",
+    "electrical_engineering": "stem",
+    "elementary_mathematics": "stem",
+    "formal_logic": "humanities",
+    "global_facts": "other",
+    "high_school_biology": "stem",
+    "high_school_chemistry": "stem",
+    "high_school_computer_science": "stem",
+    "high_school_european_history": "humanities",
+    "high_school_geography": "social_sciences",
+    "high_school_government_and_politics": "social_sciences",
+    "high_school_macroeconomics": "social_sciences",
+    "high_school_mathematics": "stem",
+    "high_school_microeconomics": "social_sciences",
+    "high_school_physics": "stem",
+    "high_school_psychology": "social_sciences",
+    "high_school_statistics": "stem",
+    "high_school_us_history": "humanities",
+    "high_school_world_history": "humanities",
+    "human_aging": "other",
+    "human_sexuality": "social_sciences",
+    "international_law": "humanities",
+    "jurisprudence": "humanities",
+    "logical_fallacies": "humanities",
+    "machine_learning": "stem",
+    "management": "other",
+    "marketing": "other",
+    "medical_genetics": "other",
+    "miscellaneous": "other",
+    "moral_disputes": "humanities",
+    "moral_scenarios": "humanities",
+    "nutrition": "other",
+    "philosophy": "humanities",
+    "prehistory": "humanities",
+    "professional_accounting": "other",
+    "professional_law": "humanities",
+    "professional_medicine": "other",
+    "professional_psychology": "social_sciences",
+    "public_relations": "social_sciences",
+    "security_studies": "social_sciences",
+    "sociology": "social_sciences",
+    "us_foreign_policy": "social_sciences",
+    "virology": "other",
+    "world_religions": "humanities",
+}
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--base_yaml_path", required=True)
+    parser.add_argument("--save_prefix_path", default="mmlu")
+    parser.add_argument("--cot_prompt_path", default=None)
+    parser.add_argument("--task_prefix", default="")
+    parser.add_argument("--group_prefix", default="")
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
+    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
+    with open(args.base_yaml_path) as f:
+        base_yaml = yaml.full_load(f)
+    if args.cot_prompt_path is not None:
+        import json
+        with open(args.cot_prompt_path) as f:
+            cot_file = json.load(f)
+    ALL_CATEGORIES = []
+    for subject, category in tqdm(SUBJECTS.items()):
+        if category not in ALL_CATEGORIES:
+            ALL_CATEGORIES.append(category)
+        if args.cot_prompt_path is not None:
+            description = cot_file[subject]
+        else:
+            description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
+        yaml_dict = {
+            "include": base_yaml_name,
+            "group": f"mmlu_{args.task_prefix}_{category}"
+            if args.task_prefix != ""
+            else f"mmlu_{category}",
+            "group_alias": category.replace("_", " "),
+            "task": f"mmlu_{args.task_prefix}_{subject}"
+            if args.task_prefix != ""
+            else f"mmlu_{subject}",
+            "task_alias": subject.replace("_", " "),
+            "dataset_name": subject,
+            "description": description,
+        }
+        file_save_path = args.save_prefix_path + f"_{subject}.yaml"
+        eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
+        with open(file_save_path, "w") as yaml_file:
+            yaml.dump(
+                yaml_dict,
+                yaml_file,
+                # width=float("inf"),
+                allow_unicode=True,
+                default_style='"',
+            )
+    if args.task_prefix != "":
+        mmlu_subcategories = [
+            f"mmlu_{args.task_prefix}_{category}" for category in ALL_CATEGORIES
+        ]
+    else:
+        mmlu_subcategories = [f"mmlu_{category}" for category in ALL_CATEGORIES]
+    if args.group_prefix != "":
+        file_save_path = args.group_prefix + ".yaml"
+    else:
+        file_save_path = args.save_prefix_path + ".yaml"
+    eval_logger.info(f"Saving benchmark config to {file_save_path}")
+    with open(file_save_path, "w") as yaml_file:
+        yaml.dump(
+            {
+                "group": f"mmlu_{args.task_prefix}"
+                if args.task_prefix != ""
+                else "mmlu",
+                "task": mmlu_subcategories,
+            },
+            yaml_file,
+            indent=4,
+            default_flow_style=False,
+        )
--- a/lm_eval/tasks/mmlu/default/_default_template_yaml
+++ b/lm_eval/tasks/mmlu/default/_default_template_yaml
+dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
+test_split: test
+fewshot_split: dev
+fewshot_config:
+  sampler: first_n
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
+doc_to_choice: ["A", "B", "C", "D"]
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
--- a/lm_eval/tasks/mmlu/default/_mmlu.yaml
+++ b/lm_eval/tasks/mmlu/default/_mmlu.yaml
+group: mmlu
+task:
+  - mmlu_stem
+  - mmlu_other
+  - mmlu_social_sciences
+  - mmlu_humanities
--- a/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml
+"dataset_name": "abstract_algebra"
+"description": "The following are multiple choice questions (with answers) about abstract\
+  \ algebra.\n\n"
+"group": "mmlu_stem"
+"group_alias": "stem"
+"include": "_default_template_yaml"
+"task": "mmlu_abstract_algebra"
+"task_alias": "abstract_algebra"
--- a/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml
+"dataset_name": "anatomy"
+"description": "The following are multiple choice questions (with answers) about anatomy.\n\
+  \n"
+"group": "mmlu_stem"
+"group_alias": "stem"
+"include": "_default_template_yaml"
+"task": "mmlu_anatomy"
+"task_alias": "anatomy"
--- a/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml
+"dataset_name": "astronomy"
+"description": "The following are multiple choice questions (with answers) about astronomy.\n\
+  \n"
+"group": "mmlu_stem"
+"group_alias": "stem"
+"include": "_default_template_yaml"
+"task": "mmlu_astronomy"
+"task_alias": "astronomy"
--- a/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml
+"dataset_name": "business_ethics"
+"description": "The following are multiple choice questions (with answers) about business\
+  \ ethics.\n\n"
+"group": "mmlu_other"
+"group_alias": "other"
+"include": "_default_template_yaml"
+"task": "mmlu_business_ethics"
+"task_alias": "business_ethics"
--- a/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml
+"dataset_name": "clinical_knowledge"
+"description": "The following are multiple choice questions (with answers) about clinical\
+  \ knowledge.\n\n"
+"group": "mmlu_other"
+"group_alias": "other"
+"include": "_default_template_yaml"
+"task": "mmlu_clinical_knowledge"
+"task_alias": "clinical_knowledge"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml
+"dataset_name": "college_biology"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ biology.\n\n"
+"group": "mmlu_stem"
+"group_alias": "stem"
+"include": "_default_template_yaml"
+"task": "mmlu_college_biology"
+"task_alias": "college_biology"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml
+"dataset_name": "college_chemistry"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ chemistry.\n\n"
+"group": "mmlu_stem"
+"group_alias": "stem"
+"include": "_default_template_yaml"
+"task": "mmlu_college_chemistry"
+"task_alias": "college_chemistry"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml
+"dataset_name": "college_computer_science"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ computer science.\n\n"
+"group": "mmlu_stem"
+"group_alias": "stem"
+"include": "_default_template_yaml"
+"task": "mmlu_college_computer_science"
+"task_alias": "college_computer_science"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml
+"dataset_name": "college_mathematics"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ mathematics.\n\n"
+"group": "mmlu_stem"
+"group_alias": "stem"
+"include": "_default_template_yaml"
+"task": "mmlu_college_mathematics"
+"task_alias": "college_mathematics"
--- a/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml
+++ b/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml
+"dataset_name": "college_medicine"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ medicine.\n\n"
+"group": "mmlu_other"
+"group_alias": "other"
+"include": "_default_template_yaml"
+"task": "mmlu_college_medicine"
+"task_alias": "college_medicine"