Commit 269b66e9 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

fix arabicmmlu

parent 73767dff
...@@ -5,3 +5,8 @@ task: ...@@ -5,3 +5,8 @@ task:
- arabicmmlu_humanities - arabicmmlu_humanities
- arabicmmlu_stem - arabicmmlu_stem
- arabicmmlu_language - arabicmmlu_language
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_humanities
group_alias: humanities
task:
- arabicmmlu_humanities_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_language
group_alias: language
task:
- arabicmmlu_language_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_other
group_alias: other
task:
- arabicmmlu_other_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_social_science
group_alias: social_science
task:
- arabicmmlu_social_science_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_stem
group_alias: stem
task:
- arabicmmlu_stem_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
...@@ -11,3 +11,5 @@ metric_list: ...@@ -11,3 +11,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
version: 0.0
...@@ -81,8 +81,7 @@ if __name__ == "__main__": ...@@ -81,8 +81,7 @@ if __name__ == "__main__":
yaml_dict = { yaml_dict = {
"include": base_yaml_name, "include": base_yaml_name,
"group": f"arabicmmlu_{category}", "tag": f"arabicmmlu_{category}",
"group_alias": category.replace("_", " "),
"task": f"arabicmmlu_{subject.lower().replace(' ', '_')}", "task": f"arabicmmlu_{subject.lower().replace(' ', '_')}",
"task_alias": subject, "task_alias": subject,
"dataset_name": subject, "dataset_name": subject,
......
"dataset_name": "Arabic Language (General)" "dataset_name": "Arabic Language (General)"
"group": "arabicmmlu_language" "tag": "arabicmmlu_language_tasks"
"group_alias": "language"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_arabic_language_(general)" "task": "arabicmmlu_arabic_language_(general)"
"task_alias": "Arabic Language (General)" "task_alias": "Arabic Language (General)"
"dataset_name": "Arabic Language (Grammar)" "dataset_name": "Arabic Language (Grammar)"
"group": "arabicmmlu_language" "tag": "arabicmmlu_language_tasks"
"group_alias": "language"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_arabic_language_(grammar)" "task": "arabicmmlu_arabic_language_(grammar)"
"task_alias": "Arabic Language (Grammar)" "task_alias": "Arabic Language (Grammar)"
"dataset_name": "Driving Test" "dataset_name": "Driving Test"
"group": "arabicmmlu_other" "tag": "arabicmmlu_other_tasks"
"group_alias": "other"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_driving_test" "task": "arabicmmlu_driving_test"
"task_alias": "Driving Test" "task_alias": "Driving Test"
"dataset_name": "General Knowledge" "dataset_name": "General Knowledge"
"group": "arabicmmlu_other" "tag": "arabicmmlu_other_tasks"
"group_alias": "other"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_general_knowledge" "task": "arabicmmlu_general_knowledge"
"task_alias": "General Knowledge" "task_alias": "General Knowledge"
"dataset_name": "High Arabic Language" "dataset_name": "High Arabic Language"
"group": "arabicmmlu_language" "tag": "arabicmmlu_language_tasks"
"group_alias": "language"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_high_arabic_language" "task": "arabicmmlu_high_arabic_language"
"task_alias": "High Arabic Language" "task_alias": "High Arabic Language"
"dataset_name": "High Biology" "dataset_name": "High Biology"
"group": "arabicmmlu_stem" "tag": "arabicmmlu_stem_tasks"
"group_alias": "stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_high_biology" "task": "arabicmmlu_high_biology"
"task_alias": "High Biology" "task_alias": "High Biology"
"dataset_name": "High Civics" "dataset_name": "High Civics"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_high_civics" "task": "arabicmmlu_high_civics"
"task_alias": "High Civics" "task_alias": "High Civics"
"dataset_name": "High Computer Science" "dataset_name": "High Computer Science"
"group": "arabicmmlu_stem" "tag": "arabicmmlu_stem_tasks"
"group_alias": "stem"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_high_computer_science" "task": "arabicmmlu_high_computer_science"
"task_alias": "High Computer Science" "task_alias": "High Computer Science"
"dataset_name": "High Economics" "dataset_name": "High Economics"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_high_economics" "task": "arabicmmlu_high_economics"
"task_alias": "High Economics" "task_alias": "High Economics"
"dataset_name": "High Geography" "dataset_name": "High Geography"
"group": "arabicmmlu_social_science" "tag": "arabicmmlu_social_science_tasks"
"group_alias": "social science"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_high_geography" "task": "arabicmmlu_high_geography"
"task_alias": "High Geography" "task_alias": "High Geography"
"dataset_name": "High History" "dataset_name": "High History"
"group": "arabicmmlu_humanities" "tag": "arabicmmlu_humanities_tasks"
"group_alias": "humanities"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_high_history" "task": "arabicmmlu_high_history"
"task_alias": "High History" "task_alias": "High History"
"dataset_name": "High Islamic Studies" "dataset_name": "High Islamic Studies"
"group": "arabicmmlu_humanities" "tag": "arabicmmlu_humanities_tasks"
"group_alias": "humanities"
"include": "_default_template_yaml" "include": "_default_template_yaml"
"task": "arabicmmlu_high_islamic_studies" "task": "arabicmmlu_high_islamic_studies"
"task_alias": "High Islamic Studies" "task_alias": "High Islamic Studies"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment