"vscode:/vscode.git/clone" did not exist on "27b0820ddd1b2a401ddc1dae06abf5515879cec3"
Unverified Commit ebbbb968 authored by Hadi Abdine's avatar Hadi Abdine Committed by GitHub
Browse files

add Darija (Moroccan dialects) tasks including darijammlu. darijahellaswag and darija_bench (#2521)



* add Darija tasks

* fix multiple groups issue in darijammlu

* add MT to the description of the Darija tasks

* Update README.md

nit

* fix the recursion error caused by the darija_summarization task

* use a custom filter instead of the decorator for the strip function

---------
Co-authored-by: default avatarBaber Abbasi <92168766+baberabb@users.noreply.github.com>
parent 5a9d5ba0
group: darijammlu_ar_mmlu
group_alias: ArabicMMLU
task:
- darijammlu_ar_mmlu_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: darijammlu_mmlu
group_alias: MMLU
task:
- darijammlu_mmlu_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
dataset_path: MBZUAI-Paris/DarijaMMLU
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: !function utils.doc_to_text
doc_to_choice: !function utils.doc_to_choice
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
dataset_kwargs:
trust_remote_code: true
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
import yaml
from tqdm import tqdm
eval_logger = logging.getLogger("lm-eval")
MMLU_SUBJECTS = {
"global_facts": "other",
"high_school_european_history": "humanities",
"high_school_geography": "social_sciences",
"high_school_government_and_politics": "social_sciences",
"high_school_psychology": "social_sciences",
"high_school_statistics": "stem",
"high_school_world_history": "humanities",
"human_aging": "other",
"international_law": "humanities",
"jurisprudence": "humanities",
"logical_fallacies": "humanities",
"management": "other",
"marketing": "other",
"moral_disputes": "humanities",
"moral_scenarios": "humanities",
"nutrition": "other",
"philosophy": "humanities",
"professional_law": "humanities",
"professional_psychology": "social_sciences",
"public_relations": "social_sciences",
"security_studies": "social_sciences",
"sociology": "social_sciences",
"world_religions": "humanities",
}
ARABIC_MMLU_SUBJECTS = {
"islamic_studies": "humanities",
"driving_test": "other",
"natural_science": "stem",
"history": "humanities",
"general_knowledge": "other",
"law": "humanities",
"physics": "stem",
"social_science": "social_sciences",
"management_ar": "other",
"arabic_language": "language",
"political_science": "social_sciences",
"philosophy_ar": "humanities",
"accounting": "social_sciences",
"computer_science": "stem",
"geography": "social_sciences",
"math": "stem",
"biology": "stem",
"economics": "social_sciences",
"arabic_language_(general)": "language",
"arabic_language_(grammar)": "language",
"civics": "social_sciences"
}
DATASETS = {
"mmlu": MMLU_SUBJECTS,
"ar_mmlu": ARABIC_MMLU_SUBJECTS,
}
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_yaml_path", default="_default_darijammlu_template_yaml")
parser.add_argument("--save_prefix_path", default="darijammlu")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f)
ALL_CATEGORIES = []
for dataset, SUBJECTS in DATASETS.items():
for subject, category in tqdm(SUBJECTS.items()):
if category not in ALL_CATEGORIES:
ALL_CATEGORIES.append(category)
yaml_dict = {
"include": base_yaml_name,
"tag": [f"darijammlu_{category}_tasks", "darijammlu_"+dataset+"_tasks"],
"task": f"darijammlu_{subject}",
"task_alias": subject.replace("_", " "),
"dataset_name": subject,
}
file_save_path = (
args.save_prefix_path + f"_{subject}.yaml"
)
eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
yaml_dict,
yaml_file,
allow_unicode=True,
default_style='"',
)
darijammlu_subcategories = [f"darijammlu_{category}" for category in ALL_CATEGORIES]
file_save_path = args.save_prefix_path + ".yaml"
eval_logger.info(f"Saving benchmark config to {file_save_path}")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
{
"group": "darijammlu",
"task": darijammlu_subcategories,
},
yaml_file,
indent=4,
default_flow_style=False,
)
\ No newline at end of file
"dataset_name": "accounting"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_accounting"
"task_alias": "accounting"
"dataset_name": "arabic_language"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_arabic_language"
"task_alias": "arabic language"
"dataset_name": "arabic_language_(general)"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_arabic_language_(general)"
"task_alias": "arabic language (general)"
"dataset_name": "arabic_language_(grammar)"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_arabic_language_(grammar)"
"task_alias": "arabic language (grammar)"
"dataset_name": "biology"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_biology"
"task_alias": "biology"
"dataset_name": "civics"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_civics"
"task_alias": "civics"
"dataset_name": "computer_science"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_computer_science"
"task_alias": "computer science"
"dataset_name": "driving_test"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_driving_test"
"task_alias": "driving test"
"dataset_name": "economics"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_economics"
"task_alias": "economics"
"dataset_name": "general_knowledge"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_general_knowledge"
"task_alias": "general knowledge"
"dataset_name": "geography"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_ar_mmlu_tasks"
"task": "darijammlu_geography"
"task_alias": "geography"
"dataset_name": "global_facts"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_global_facts"
"task_alias": "global facts"
"dataset_name": "high_school_european_history"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_high_school_european_history"
"task_alias": "high school european history"
"dataset_name": "high_school_geography"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_high_school_geography"
"task_alias": "high school geography"
"dataset_name": "high_school_government_and_politics"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_high_school_government_and_politics"
"task_alias": "high school government and politics"
"dataset_name": "high_school_psychology"
"include": "_default_darijammlu_template_yaml"
"tag":
- "darijammlu_mmlu_tasks"
"task": "darijammlu_high_school_psychology"
"task_alias": "high school psychology"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment