Merge branch 'main' into ai2d

89b6bdb3 · Baber · 59053d58 · 144a1e58 · 89b6bdb3 · 89b6bdb3
Commit 89b6bdb3 authored Feb 06, 2025 by Baber
20 changed files
--- a/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml
+++ b/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml
+dataset_path: CohereForAI/Global-MMLU-Lite
+dataset_name: zh
+test_split: test
+fewshot_split: dev
+fewshot_config:
+  sampler: default
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
+doc_to_choice: ["A", "B", "C", "D"]
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml
+++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml
+# Generated by _generate_configs.py
+include: _zh_template_yaml
+process_docs: !function utils.process_business
+task: global_mmlu_zh_business
--- a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml
+++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml
+# Generated by _generate_configs.py
+include: _zh_template_yaml
+process_docs: !function utils.process_humanities
+task: global_mmlu_zh_humanities
--- a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml
+++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml
+# Generated by _generate_configs.py
+include: _zh_template_yaml
+process_docs: !function utils.process_medical
+task: global_mmlu_zh_medical
--- a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml
+++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml
+# Generated by _generate_configs.py
+include: _zh_template_yaml
+process_docs: !function utils.process_other
+task: global_mmlu_zh_other
--- a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml
+++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml
+# Generated by _generate_configs.py
+include: _zh_template_yaml
+process_docs: !function utils.process_social_sciences
+task: global_mmlu_zh_social_sciences
--- a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml
+++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml
+# Generated by _generate_configs.py
+include: _zh_template_yaml
+process_docs: !function utils.process_stem
+task: global_mmlu_zh_stem
--- a/lm_eval/tasks/global_mmlu/default/zh/utils.py
+++ b/lm_eval/tasks/global_mmlu/default/zh/utils.py
+from functools import partial
+
+
+CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]
+
+
+def process_docs(dataset, category):
+    return dataset.filter(lambda x: x["subject_category"] == category)
+
+
+process_functions = {
+    f"process_{category.lower().replace(' ', '_')}": partial(
+        process_docs, category=category
+    )
+    for category in CATEGORIES
+}
+
+globals().update(process_functions)
--- a/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml
+dataset_path: CohereForAI/Global-MMLU
+dataset_name: am
+test_split: test
+fewshot_split: dev
+fewshot_config:
+  sampler: first_n
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
+doc_to_choice: ["A", "B", "C", "D"]
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml
+group: global_mmlu_full_am
+task:
+  - global_mmlu_full_am_stem
+  - global_mmlu_full_am_other
+  - global_mmlu_full_am_social_sciences
+  - global_mmlu_full_am_humanities
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: True
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml
+group: global_mmlu_full_am_humanities
+task:
+  - global_mmlu_full_am_humanities_tasks
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml
+group: global_mmlu_full_am_other
+task:
+  - global_mmlu_full_am_other_tasks
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml
+group: global_mmlu_full_am_social_sciences
+task:
+  - global_mmlu_full_am_social_sciences_tasks
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml
+group: global_mmlu_full_am_stem
+task:
+  - global_mmlu_full_am_stem_tasks
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml
+# Generated by _generate_configs.py
+include: _am_template_yaml
+process_docs: !function utils.process_abstract_algebra
+tag: global_mmlu_full_am_stem_tasks
+task: global_mmlu_full_am_abstract_algebra
--- a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml
+# Generated by _generate_configs.py
+include: _am_template_yaml
+process_docs: !function utils.process_anatomy
+tag: global_mmlu_full_am_stem_tasks
+task: global_mmlu_full_am_anatomy
--- a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml
+# Generated by _generate_configs.py
+include: _am_template_yaml
+process_docs: !function utils.process_astronomy
+tag: global_mmlu_full_am_stem_tasks
+task: global_mmlu_full_am_astronomy
--- a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml
+# Generated by _generate_configs.py
+include: _am_template_yaml
+process_docs: !function utils.process_business_ethics
+tag: global_mmlu_full_am_other_tasks
+task: global_mmlu_full_am_business_ethics
--- a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml
+# Generated by _generate_configs.py
+include: _am_template_yaml
+process_docs: !function utils.process_clinical_knowledge
+tag: global_mmlu_full_am_other_tasks
+task: global_mmlu_full_am_clinical_knowledge
--- a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml
+++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml
+# Generated by _generate_configs.py
+include: _am_template_yaml
+process_docs: !function utils.process_college_biology
+tag: global_mmlu_full_am_stem_tasks
+task: global_mmlu_full_am_college_biology