Add INCLUDE tasks (#2769)

* Add INCLUDE tasks * pacify pre-commit --------- Co-authored-by: Baber <baber@hey.com>

Add INCLUDE tasks (#2769)
* Add INCLUDE tasks * pacify pre-commit --------- Co-authored-by: Baber <baber@hey.com>
6fbebb4b · Angelika Romanou · GitHub · bb4fa95e · 6fbebb4b · 6fbebb4b
Unverified Commit 6fbebb4b authored Mar 17, 2025 by Angelika Romanou Committed by GitHub Mar 18, 2025
20 changed files
--- a/lm_eval/tasks/include/few_shot_en/Japanese/utils.py
+++ b/lm_eval/tasks/include/few_shot_en/Japanese/utils.py
+from functools import partial
+
+
+CATEGORIES = [
+    "Applied Science",
+    "Arts & Humanities",
+    "Business & Commerce",
+    "Driving License",
+    "General knowledge",
+    "Health oriented education",
+    "Marine License",
+    "Medical License",
+    "Professional certification",
+    "STEM",
+    "Social Science",
+]
+
+
+def process_docs(dataset, category):
+    return dataset.filter(lambda x: x["domain"] == category)
+
+
+process_functions = {
+    f"process_{category.lower().replace(' & ', '_').replace(' ', '_')}": partial(
+        process_docs, category=category
+    )
+    for category in CATEGORIES
+}
+
+globals().update(process_functions)
--- a/lm_eval/tasks/include/few_shot_en/Kazakh/_include_base_44_kazakh.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Kazakh/_include_base_44_kazakh.yaml
+group: include_base_44_kazakh
+task:
+- include_base_44_kazakh_few_shot_en_arts_humanities
+aggregate_metric_list:
+- metric: acc
+  weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/include/few_shot_en/Kazakh/_kazakh_few_shot_en_template_yaml
+++ b/lm_eval/tasks/include/few_shot_en/Kazakh/_kazakh_few_shot_en_template_yaml
+dataset_path: CohereForAI/include-base-44
+dataset_name: Kazakh
+test_split: test
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\n
+  D. {{option_d}}\nAnswer:"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/include/few_shot_en/Kazakh/include_base_44_kazakh_arts_humanities.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Kazakh/include_base_44_kazakh_arts_humanities.yaml
+include: _kazakh_few_shot_en_template_yaml
+description: The following are multiple-choice questions (with answers) about Arts
+  & Humanities.
+process_docs: !function 'utils.process_arts_humanities'
+task: include_base_44_kazakh_few_shot_en_arts_humanities
--- a/lm_eval/tasks/include/few_shot_en/Kazakh/utils.py
+++ b/lm_eval/tasks/include/few_shot_en/Kazakh/utils.py
+from functools import partial
+
+
+CATEGORIES = [
+    "Applied Science",
+    "Arts & Humanities",
+    "Business & Commerce",
+    "Driving License",
+    "General knowledge",
+    "Health oriented education",
+    "Marine License",
+    "Medical License",
+    "Professional certification",
+    "STEM",
+    "Social Science",
+]
+
+
+def process_docs(dataset, category):
+    return dataset.filter(lambda x: x["domain"] == category)
+
+
+process_functions = {
+    f"process_{category.lower().replace(' & ', '_').replace(' ', '_')}": partial(
+        process_docs, category=category
+    )
+    for category in CATEGORIES
+}
+
+globals().update(process_functions)
--- a/lm_eval/tasks/include/few_shot_en/Korean/_include_base_44_korean.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Korean/_include_base_44_korean.yaml
+group: include_base_44_korean
+task:
+- include_base_44_korean_few_shot_en_professional_certification
+- include_base_44_korean_few_shot_en_social_science
+aggregate_metric_list:
+- metric: acc
+  weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/include/few_shot_en/Korean/_korean_few_shot_en_template_yaml
+++ b/lm_eval/tasks/include/few_shot_en/Korean/_korean_few_shot_en_template_yaml
+dataset_path: CohereForAI/include-base-44
+dataset_name: Korean
+test_split: test
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\n
+  D. {{option_d}}\nAnswer:"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/include/few_shot_en/Korean/include_base_44_korean_professional_certification.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Korean/include_base_44_korean_professional_certification.yaml
+include: _korean_few_shot_en_template_yaml
+description: The following are multiple-choice questions (with answers) about Professional
+  certification.
+process_docs: !function 'utils.process_professional_certification'
+task: include_base_44_korean_few_shot_en_professional_certification
--- a/lm_eval/tasks/include/few_shot_en/Korean/include_base_44_korean_social_science.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Korean/include_base_44_korean_social_science.yaml
+include: _korean_few_shot_en_template_yaml
+description: The following are multiple-choice questions (with answers) about Social
+  Science.
+process_docs: !function 'utils.process_social_science'
+task: include_base_44_korean_few_shot_en_social_science
--- a/lm_eval/tasks/include/few_shot_en/Korean/utils.py
+++ b/lm_eval/tasks/include/few_shot_en/Korean/utils.py
+from functools import partial
+
+
+CATEGORIES = [
+    "Applied Science",
+    "Arts & Humanities",
+    "Business & Commerce",
+    "Driving License",
+    "General knowledge",
+    "Health oriented education",
+    "Marine License",
+    "Medical License",
+    "Professional certification",
+    "STEM",
+    "Social Science",
+]
+
+
+def process_docs(dataset, category):
+    return dataset.filter(lambda x: x["domain"] == category)
+
+
+process_functions = {
+    f"process_{category.lower().replace(' & ', '_').replace(' ', '_')}": partial(
+        process_docs, category=category
+    )
+    for category in CATEGORIES
+}
+
+globals().update(process_functions)
--- a/lm_eval/tasks/include/few_shot_en/Lithuanian/_include_base_44_lithuanian.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Lithuanian/_include_base_44_lithuanian.yaml
+group: include_base_44_lithuanian
+task:
+- include_base_44_lithuanian_few_shot_en_arts_humanities
+- include_base_44_lithuanian_few_shot_en_stem
+- include_base_44_lithuanian_few_shot_en_social_science
+- include_base_44_lithuanian_few_shot_en_business_commerce
+- include_base_44_lithuanian_few_shot_en_professional_certification
+aggregate_metric_list:
+- metric: acc
+  weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/include/few_shot_en/Lithuanian/_lithuanian_few_shot_en_template_yaml
+++ b/lm_eval/tasks/include/few_shot_en/Lithuanian/_lithuanian_few_shot_en_template_yaml
+dataset_path: CohereForAI/include-base-44
+dataset_name: Lithuanian
+test_split: test
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\n
+  D. {{option_d}}\nAnswer:"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_arts_humanities.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_arts_humanities.yaml
+include: _lithuanian_few_shot_en_template_yaml
+description: The following are multiple-choice questions (with answers) about Arts
+  & Humanities.
+process_docs: !function 'utils.process_arts_humanities'
+task: include_base_44_lithuanian_few_shot_en_arts_humanities
--- a/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_business_commerce.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_business_commerce.yaml
+include: _lithuanian_few_shot_en_template_yaml
+description: The following are multiple-choice questions (with answers) about Business
+  & Commerce.
+process_docs: !function 'utils.process_business_commerce'
+task: include_base_44_lithuanian_few_shot_en_business_commerce
--- a/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_professional_certification.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_professional_certification.yaml
+include: _lithuanian_few_shot_en_template_yaml
+description: The following are multiple-choice questions (with answers) about Professional
+  certification.
+process_docs: !function 'utils.process_professional_certification'
+task: include_base_44_lithuanian_few_shot_en_professional_certification
--- a/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_social_science.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_social_science.yaml
+include: _lithuanian_few_shot_en_template_yaml
+description: The following are multiple-choice questions (with answers) about Social
+  Science.
+process_docs: !function 'utils.process_social_science'
+task: include_base_44_lithuanian_few_shot_en_social_science
--- a/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_stem.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Lithuanian/include_base_44_lithuanian_stem.yaml
+include: _lithuanian_few_shot_en_template_yaml
+description: The following are multiple-choice questions (with answers) about STEM.
+process_docs: !function 'utils.process_stem'
+task: include_base_44_lithuanian_few_shot_en_stem
--- a/lm_eval/tasks/include/few_shot_en/Lithuanian/utils.py
+++ b/lm_eval/tasks/include/few_shot_en/Lithuanian/utils.py
+from functools import partial
+
+
+CATEGORIES = [
+    "Applied Science",
+    "Arts & Humanities",
+    "Business & Commerce",
+    "Driving License",
+    "General knowledge",
+    "Health oriented education",
+    "Marine License",
+    "Medical License",
+    "Professional certification",
+    "STEM",
+    "Social Science",
+]
+
+
+def process_docs(dataset, category):
+    return dataset.filter(lambda x: x["domain"] == category)
+
+
+process_functions = {
+    f"process_{category.lower().replace(' & ', '_').replace(' ', '_')}": partial(
+        process_docs, category=category
+    )
+    for category in CATEGORIES
+}
+
+globals().update(process_functions)
--- a/lm_eval/tasks/include/few_shot_en/Malay/_include_base_44_malay.yaml
+++ b/lm_eval/tasks/include/few_shot_en/Malay/_include_base_44_malay.yaml
+group: include_base_44_malay
+task:
+- include_base_44_malay_few_shot_en_social_science
+- include_base_44_malay_few_shot_en_business_commerce
+- include_base_44_malay_few_shot_en_arts_humanities
+aggregate_metric_list:
+- metric: acc
+  weight_by_size: true
+metadata:
+  version: 0.0
--- a/lm_eval/tasks/include/few_shot_en/Malay/_malay_few_shot_en_template_yaml
+++ b/lm_eval/tasks/include/few_shot_en/Malay/_malay_few_shot_en_template_yaml
+dataset_path: CohereForAI/include-base-44
+dataset_name: Malay
+test_split: test
+output_type: multiple_choice
+doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\n
+  D. {{option_d}}\nAnswer:"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+doc_to_target: answer
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 0.0