Unverified Commit 6dac8c69 authored by Boda Sadallah's avatar Boda Sadallah Committed by GitHub
Browse files

New arabicmmlu (#2541)

* point to the original ArabicMMLU dataset

* create the new subtasks files

* fix bug when the context filed is empty
parent a5c344cf
...@@ -9,4 +9,4 @@ aggregate_metric_list: ...@@ -9,4 +9,4 @@ aggregate_metric_list:
- metric: acc - metric: acc
weight_by_size: True weight_by_size: True
metadata: metadata:
version: 0 version: 1
...@@ -6,4 +6,4 @@ aggregate_metric_list: ...@@ -6,4 +6,4 @@ aggregate_metric_list:
- metric: acc - metric: acc
weight_by_size: True weight_by_size: True
metadata: metadata:
version: 0 version: 1
...@@ -6,4 +6,4 @@ aggregate_metric_list: ...@@ -6,4 +6,4 @@ aggregate_metric_list:
- metric: acc - metric: acc
weight_by_size: True weight_by_size: True
metadata: metadata:
version: 0 version: 1
...@@ -6,4 +6,4 @@ aggregate_metric_list: ...@@ -6,4 +6,4 @@ aggregate_metric_list:
- metric: acc - metric: acc
weight_by_size: True weight_by_size: True
metadata: metadata:
version: 0 version: 1
...@@ -6,4 +6,4 @@ aggregate_metric_list: ...@@ -6,4 +6,4 @@ aggregate_metric_list:
- metric: acc - metric: acc
weight_by_size: True weight_by_size: True
metadata: metadata:
version: 0 version: 1
...@@ -6,4 +6,4 @@ aggregate_metric_list: ...@@ -6,4 +6,4 @@ aggregate_metric_list:
- metric: acc - metric: acc
weight_by_size: True weight_by_size: True
metadata: metadata:
version: 0 version: 1
dataset_path: yazeed7/ArabicMMLU dataset_path: MBZUAI/ArabicMMLU
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
fewshot_config: fewshot_config:
...@@ -12,4 +12,4 @@ metric_list: ...@@ -12,4 +12,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
version: 0.0 version: 1.0
...@@ -13,48 +13,46 @@ from tqdm import tqdm ...@@ -13,48 +13,46 @@ from tqdm import tqdm
eval_logger = logging.getLogger("lm-eval") eval_logger = logging.getLogger("lm-eval")
SUBJECTS = { SUBJECTS = {'Islamic Studies': 'humanities',
"Driving Test": "other", 'Driving Test': 'other',
"High Geography": "social_science", 'Natural Science (Middle School)': 'stem',
"High History": "humanities", 'Natural Science (Primary School)': 'stem',
"Islamic Studies": "humanities", 'History (Primary School)': 'humanities',
"Univ Accounting": "social_science", 'History (Middle School)': 'humanities',
"Primary General Knowledge": "other", 'History (High School)': 'humanities',
"Univ Political Science": "social_science", 'General Knowledge': 'other',
"Primary Math": "stem", 'General Knowledge (Primary School)': 'other',
"Middle General Knowledge": "other", 'General Knowledge (Middle School)': 'other',
"High Biology": "stem", 'Law (Professional)': 'humanities',
"Primary Natural Science": "stem", 'Physics (High School)': 'stem',
"High Economics": "social_science", 'Social Science (Middle School)': 'social_science',
"Middle Natural Science": "stem", 'Social Science (Primary School)': 'social_science',
"Middle Geography": "social_science", 'Management (University)': 'other',
"Primary Social Science": "social_science", 'Arabic Language (Primary School)': 'language',
"Middle Computer Science": "stem", 'Arabic Language (Middle School)': 'language',
"Middle Islamic Studies": "humanities", 'Arabic Language (High School)': 'language',
"Primary Computer Science": "stem", 'Political Science (University)': 'social_science',
"High Physics": "stem", 'Philosophy (High School)': 'humanities',
"Middle Social Science": "social_science", 'Accounting (University)': 'social_science',
"Middle Civics": "social_science", 'Computer Science (University)': 'stem',
"High Computer Science": "stem", 'Computer Science (Middle School)': 'stem',
"General Knowledge": "other", 'Computer Science (Primary School)': 'stem',
"High Civics": "social_science", 'Computer Science (High School)': 'stem',
"Prof Law": "humanities", 'Geography (Primary School)': 'social_science',
"High Islamic Studies": "humanities", 'Geography (Middle School)': 'social_science',
"Primary Arabic Language": "language", 'Geography (High School)': 'social_science',
"High Arabic Language": "language", 'Math (Primary School)': 'stem',
"Arabic Language (Grammar)": "language", 'Biology (High School)': 'stem',
"Primary History": "humanities", 'Economics (University)': 'social_science',
"Middle History": "humanities", 'Economics (Middle School)': 'social_science',
"Univ Economics": "social_science", 'Economics (High School)': 'social_science',
"Arabic Language (General)": "language", 'Arabic Language (General)': 'language',
"Univ Computer Science": "stem", 'Arabic Language (Grammar)': 'language',
"Primary Islamic Studies": "humanities", 'Islamic Studies (High School)': 'humanities',
"Primary Geography": "social_science", 'Islamic Studies (Middle School)': 'humanities',
"High Philosophy": "humanities", 'Islamic Studies (Primary School)': 'humanities',
"Middle Arabic Language": "language", 'Civics (Middle School)': 'social_science',
"Middle Economics": "social_science", 'Civics (High School)': 'social_science'}
"Univ Management": "other",
}
def parse_args(): def parse_args():
...@@ -69,8 +67,9 @@ if __name__ == "__main__": ...@@ -69,8 +67,9 @@ if __name__ == "__main__":
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs. # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1] base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f) # with open(args.base_yaml_path, encoding="utf-8") as f:
# base_yaml = yaml.full_load(f)
ALL_CATEGORIES = [] ALL_CATEGORIES = []
for subject, category in tqdm(SUBJECTS.items()): for subject, category in tqdm(SUBJECTS.items()):
...@@ -81,8 +80,8 @@ if __name__ == "__main__": ...@@ -81,8 +80,8 @@ if __name__ == "__main__":
yaml_dict = { yaml_dict = {
"include": base_yaml_name, "include": base_yaml_name,
"tag": f"arabicmmlu_{category}", "tag": f"arabicmmlu_{category}_tasks",
"task": f"arabicmmlu_{subject.lower().replace(' ', '_')}", "task": f"arabicmmlu_{subject.lower().replace(' ', '_').replace('(', '').replace(')', '')}",
"task_alias": subject, "task_alias": subject,
"dataset_name": subject, "dataset_name": subject,
# "description": description, # "description": description,
......
"dataset_name": "Middle Civics" "dataset_name": "Accounting (University)"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_middle_civics" "tag": "arabicmmlu_social_science_tasks"
"task_alias": "Middle Civics" "task": "arabicmmlu_accounting_university"
"task_alias": "Accounting (University)"
"dataset_name": "Arabic Language (General)" "dataset_name": "Arabic Language (General)"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_arabic_language_(general)" "tag": "arabicmmlu_language_tasks"
"task": "arabicmmlu_arabic_language_general"
"task_alias": "Arabic Language (General)" "task_alias": "Arabic Language (General)"
"dataset_name": "Arabic Language (Grammar)" "dataset_name": "Arabic Language (Grammar)"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_arabic_language_(grammar)" "tag": "arabicmmlu_language_tasks"
"task": "arabicmmlu_arabic_language_grammar"
"task_alias": "Arabic Language (Grammar)" "task_alias": "Arabic Language (Grammar)"
"dataset_name": "High Arabic Language" "dataset_name": "Arabic Language (High School)"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_high_arabic_language" "tag": "arabicmmlu_language_tasks"
"task_alias": "High Arabic Language" "task": "arabicmmlu_arabic_language_high_school"
"task_alias": "Arabic Language (High School)"
"dataset_name": "Middle Arabic Language" "dataset_name": "Arabic Language (Middle School)"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_middle_arabic_language" "tag": "arabicmmlu_language_tasks"
"task_alias": "Middle Arabic Language" "task": "arabicmmlu_arabic_language_middle_school"
"task_alias": "Arabic Language (Middle School)"
"dataset_name": "Primary Arabic Language" "dataset_name": "Arabic Language (Primary School)"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_arabic_language" "tag": "arabicmmlu_language_tasks"
"task_alias": "Primary Arabic Language" "task": "arabicmmlu_arabic_language_primary_school"
"task_alias": "Arabic Language (Primary School)"
"dataset_name": "High Physics" "dataset_name": "Biology (High School)"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_high_physics" "tag": "arabicmmlu_stem_tasks"
"task_alias": "High Physics" "task": "arabicmmlu_biology_high_school"
"task_alias": "Biology (High School)"
"dataset_name": "High Economics" "dataset_name": "Civics (High School)"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_high_economics" "tag": "arabicmmlu_social_science_tasks"
"task_alias": "High Economics" "task": "arabicmmlu_civics_high_school"
"task_alias": "Civics (High School)"
"dataset_name": "High Geography" "dataset_name": "Civics (Middle School)"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_high_geography" "tag": "arabicmmlu_social_science_tasks"
"task_alias": "High Geography" "task": "arabicmmlu_civics_middle_school"
"task_alias": "Civics (Middle School)"
"dataset_name": "High Computer Science" "dataset_name": "Computer Science (High School)"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml" "include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_high_computer_science" "tag": "arabicmmlu_stem_tasks"
"task_alias": "High Computer Science" "task": "arabicmmlu_computer_science_high_school"
"task_alias": "Computer Science (High School)"
"dataset_name": "Computer Science (Middle School)"
"include": "_default_arabicmmlu_template_yaml"
"tag": "arabicmmlu_stem_tasks"
"task": "arabicmmlu_computer_science_middle_school"
"task_alias": "Computer Science (Middle School)"
"dataset_name": "Computer Science (Primary School)"
"include": "_default_arabicmmlu_template_yaml"
"tag": "arabicmmlu_stem_tasks"
"task": "arabicmmlu_computer_science_primary_school"
"task_alias": "Computer Science (Primary School)"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment