separate category for `global_mmlu` (#2652)

* separate category * set version 0.0 * apply precommit

separate category for `global_mmlu` (#2652)
* separate category * set version 0.0 * apply precommit
5c006ed4 · Minho Ryu · GitHub · 370e2f9e · 370e2f9e · 5c006ed4
Unverified Commit 5c006ed4 authored Jan 25, 2025 by Minho Ryu Committed by GitHub Jan 24, 2025
20 changed files
--- a/lm_eval/tasks/global_mmlu/default/_generate_configs.py
+++ b/lm_eval/tasks/global_mmlu/default/_generate_configs.py
-import yaml
-
-
-languages = [
-    "en",
-    "ar",
-    "fr",
-    "es",
-    "hi",
-    "de",
-    "id",
-    "it",
-    "ja",
-    "ko",
-    "pt",
-    "zh",
-    "yo",
-    "bn",
-    "sw",
-]
-
-
-def main() -> None:
-    for language in languages:
-        file_name = f"global_mmlu_{language}.yaml"
-        try:
-            with open(f"{file_name}", "w") as f:
-                f.write("# Generated by _generate_configs.py\n")
-                yaml.dump(
-                    {
-                        "include": "_default_yaml",
-                        "task": f"global_mmlu_{language}",
-                        "dataset_name": language,
-                    },
-                    f,
-                )
-        except FileExistsError:
-            pass
-
-
-if __name__ == "__main__":
-    main()
--- a/lm_eval/tasks/global_mmlu/default/_default_yaml
+++ b/lm_eval/tasks/global_mmlu/default/_default_yaml
-tag:
-  - global_mmlu
 dataset_path: CohereForAI/Global-MMLU-Lite
+dataset_name: ar
 test_split: test
 fewshot_split: dev
 fewshot_config:

--- a/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml
+++ b/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml
+group: global_mmlu_ar
+task:
+  - global_mmlu_ar_business
+  - global_mmlu_ar_humanities
+  - global_mmlu_ar_medical
+  - global_mmlu_ar_other
+  - global_mmlu_ar_stem
+  - global_mmlu_ar_social_sciences
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: True
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml
+++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml
+# Generated by _generate_configs.py
+include: _ar_template_yaml
+process_docs: !function utils.process_business
+task: global_mmlu_ar_business
--- a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml
+++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml
+# Generated by _generate_configs.py
+include: _ar_template_yaml
+process_docs: !function utils.process_humanities
+task: global_mmlu_ar_humanities
--- a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml
+++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml
+# Generated by _generate_configs.py
+include: _ar_template_yaml
+process_docs: !function utils.process_medical
+task: global_mmlu_ar_medical
--- a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml
+++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml
+# Generated by _generate_configs.py
+include: _ar_template_yaml
+process_docs: !function utils.process_other
+task: global_mmlu_ar_other
--- a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml
+++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml
+# Generated by _generate_configs.py
+include: _ar_template_yaml
+process_docs: !function utils.process_social_sciences
+task: global_mmlu_ar_social_sciences
--- a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml
+++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml
+# Generated by _generate_configs.py
+include: _ar_template_yaml
+process_docs: !function utils.process_stem
+task: global_mmlu_ar_stem
--- a/lm_eval/tasks/global_mmlu/default/ar/utils.py
+++ b/lm_eval/tasks/global_mmlu/default/ar/utils.py
+from functools import partial
+
+
+CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]
+
+
+def process_docs(dataset, category):
+    return dataset.filter(lambda x: x["subject_category"] == category)
+
+
+process_functions = {
+    f"process_{category.lower().replace(' ', '_')}": partial(
+        process_docs, category=category
+    )
+    for category in CATEGORIES
+}
+
+globals().update(process_functions)
--- a/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml
+++ b/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml
+dataset_path: CohereForAI/Global-MMLU-Lite
+dataset_name: bn
+test_split: test
+fewshot_split: dev
+fewshot_config:
+  sampler: default
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
+doc_to_choice: ["A", "B", "C", "D"]
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml
+++ b/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml
+group: global_mmlu_bn
+task:
+  - global_mmlu_bn_business
+  - global_mmlu_bn_humanities
+  - global_mmlu_bn_medical
+  - global_mmlu_bn_other
+  - global_mmlu_bn_stem
+  - global_mmlu_bn_social_sciences
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: True
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml
+++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml
+# Generated by _generate_configs.py
+include: _bn_template_yaml
+process_docs: !function utils.process_business
+task: global_mmlu_bn_business
--- a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml
+++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml
+# Generated by _generate_configs.py
+include: _bn_template_yaml
+process_docs: !function utils.process_humanities
+task: global_mmlu_bn_humanities
--- a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml
+++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml
+# Generated by _generate_configs.py
+include: _bn_template_yaml
+process_docs: !function utils.process_medical
+task: global_mmlu_bn_medical
--- a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml
+++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml
+# Generated by _generate_configs.py
+include: _bn_template_yaml
+process_docs: !function utils.process_other
+task: global_mmlu_bn_other
--- a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml
+++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml
+# Generated by _generate_configs.py
+include: _bn_template_yaml
+process_docs: !function utils.process_social_sciences
+task: global_mmlu_bn_social_sciences
--- a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml
+++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml
+# Generated by _generate_configs.py
+include: _bn_template_yaml
+process_docs: !function utils.process_stem
+task: global_mmlu_bn_stem
--- a/lm_eval/tasks/global_mmlu/default/bn/utils.py
+++ b/lm_eval/tasks/global_mmlu/default/bn/utils.py
+from functools import partial
+
+
+CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]
+
+
+def process_docs(dataset, category):
+    return dataset.filter(lambda x: x["subject_category"] == category)
+
+
+process_functions = {
+    f"process_{category.lower().replace(' ', '_')}": partial(
+        process_docs, category=category
+    )
+    for category in CATEGORIES
+}
+
+globals().update(process_functions)
--- a/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml
+++ b/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml
+dataset_path: CohereForAI/Global-MMLU-Lite
+dataset_name: de
+test_split: test
+fewshot_split: dev
+fewshot_config:
+  sampler: default
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
+doc_to_choice: ["A", "B", "C", "D"]
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0