Merge branch 'main' into llama

# Conflicts: # lm_eval/tasks/llama3/README.md

Merge branch 'main' into llama
# Conflicts: # lm_eval/tasks/llama3/README.md
bc4b922c · Baber · 748eb47e · b2c090cc · bc4b922c · bc4b922c
Commit bc4b922c authored Jan 21, 2025 by Baber
20 changed files
--- a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml
+++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml
+# Generated by _generate_configs.py
+include: _en_template_yaml
+process_docs: !function utils.process_professional_psychology
+tag: global_mmlu_full_en_social_sciences_tasks
+task: global_mmlu_full_en_professional_psychology
--- a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml
+++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml
+# Generated by _generate_configs.py
+include: _en_template_yaml
+process_docs: !function utils.process_public_relations
+tag: global_mmlu_full_en_social_sciences_tasks
+task: global_mmlu_full_en_public_relations
--- a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml
+++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml
+# Generated by _generate_configs.py
+include: _en_template_yaml
+process_docs: !function utils.process_security_studies
+tag: global_mmlu_full_en_social_sciences_tasks
+task: global_mmlu_full_en_security_studies
--- a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml
+++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml
+# Generated by _generate_configs.py
+include: _en_template_yaml
+process_docs: !function utils.process_sociology
+tag: global_mmlu_full_en_social_sciences_tasks
+task: global_mmlu_full_en_sociology
--- a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml
+++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml
+# Generated by _generate_configs.py
+include: _en_template_yaml
+process_docs: !function utils.process_us_foreign_policy
+tag: global_mmlu_full_en_social_sciences_tasks
+task: global_mmlu_full_en_us_foreign_policy
--- a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml
+++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml
+# Generated by _generate_configs.py
+include: _en_template_yaml
+process_docs: !function utils.process_virology
+tag: global_mmlu_full_en_other_tasks
+task: global_mmlu_full_en_virology
--- a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml
+++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml
+# Generated by _generate_configs.py
+include: _en_template_yaml
+process_docs: !function utils.process_world_religions
+tag: global_mmlu_full_en_humanities_tasks
+task: global_mmlu_full_en_world_religions
--- a/lm_eval/tasks/global_mmlu/full/en/utils.py
+++ b/lm_eval/tasks/global_mmlu/full/en/utils.py
+from functools import partial
+
+
+SUBJECTS = [
+    "abstract_algebra",
+    "anatomy",
+    "astronomy",
+    "business_ethics",
+    "clinical_knowledge",
+    "college_biology",
+    "college_chemistry",
+    "college_computer_science",
+    "college_mathematics",
+    "college_medicine",
+    "college_physics",
+    "computer_security",
+    "conceptual_physics",
+    "econometrics",
+    "electrical_engineering",
+    "elementary_mathematics",
+    "formal_logic",
+    "global_facts",
+    "high_school_biology",
+    "high_school_chemistry",
+    "high_school_computer_science",
+    "high_school_european_history",
+    "high_school_geography",
+    "high_school_government_and_politics",
+    "high_school_macroeconomics",
+    "high_school_mathematics",
+    "high_school_microeconomics",
+    "high_school_physics",
+    "high_school_psychology",
+    "high_school_statistics",
+    "high_school_us_history",
+    "high_school_world_history",
+    "human_aging",
+    "human_sexuality",
+    "international_law",
+    "jurisprudence",
+    "logical_fallacies",
+    "machine_learning",
+    "management",
+    "marketing",
+    "medical_genetics",
+    "miscellaneous",
+    "moral_disputes",
+    "moral_scenarios",
+    "nutrition",
+    "philosophy",
+    "prehistory",
+    "professional_accounting",
+    "professional_law",
+    "professional_medicine",
+    "professional_psychology",
+    "public_relations",
+    "security_studies",
+    "sociology",
+    "us_foreign_policy",
+    "virology",
+    "world_religions",
+]
+
+
+def process_docs(dataset, subject):
+    return dataset.filter(lambda x: x["subject"] == subject)
+
+
+process_functions = {
+    f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS
+}
+
+globals().update(process_functions)
--- a/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml
+dataset_path: CohereForAI/Global-MMLU
+dataset_name: es
+test_split: test
+fewshot_split: dev
+fewshot_config:
+  sampler: first_n
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
+doc_to_choice: ["A", "B", "C", "D"]
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml
+group: global_mmlu_full_es
+task:
+  - global_mmlu_full_es_stem
+  - global_mmlu_full_es_other
+  - global_mmlu_full_es_social_sciences
+  - global_mmlu_full_es_humanities
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: True
+metadata:
+  version: 1.0
--- a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml
+group: global_mmlu_full_es_humanities
+task:
+  - global_mmlu_full_es_humanities_tasks
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml
+group: global_mmlu_full_es_other
+task:
+  - global_mmlu_full_es_other_tasks
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml
+group: global_mmlu_full_es_social_sciences
+task:
+  - global_mmlu_full_es_social_sciences_tasks
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml
+group: global_mmlu_full_es_stem
+task:
+  - global_mmlu_full_es_stem_tasks
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml
+# Generated by _generate_configs.py
+include: _es_template_yaml
+process_docs: !function utils.process_abstract_algebra
+tag: global_mmlu_full_es_stem_tasks
+task: global_mmlu_full_es_abstract_algebra
--- a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml
+# Generated by _generate_configs.py
+include: _es_template_yaml
+process_docs: !function utils.process_anatomy
+tag: global_mmlu_full_es_stem_tasks
+task: global_mmlu_full_es_anatomy
--- a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml
+# Generated by _generate_configs.py
+include: _es_template_yaml
+process_docs: !function utils.process_astronomy
+tag: global_mmlu_full_es_stem_tasks
+task: global_mmlu_full_es_astronomy
--- a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml
+# Generated by _generate_configs.py
+include: _es_template_yaml
+process_docs: !function utils.process_business_ethics
+tag: global_mmlu_full_es_other_tasks
+task: global_mmlu_full_es_business_ethics
--- a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml
+# Generated by _generate_configs.py
+include: _es_template_yaml
+process_docs: !function utils.process_clinical_knowledge
+tag: global_mmlu_full_es_other_tasks
+task: global_mmlu_full_es_clinical_knowledge
--- a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml
+++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml
+# Generated by _generate_configs.py
+include: _es_template_yaml
+process_docs: !function utils.process_college_biology
+tag: global_mmlu_full_es_stem_tasks
+task: global_mmlu_full_es_college_biology