Commit c1e63555 authored by Yu Shi Jie's avatar Yu Shi Jie
Browse files

Merge branch 'upstream' into 'mmlu-pro'

add tokenizer logs info (#1731)

See merge request shijie.yu/lm-evaluation-harness!4
parents e361687c 42dc2448
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_ar
dataset_path: hails/agieval-lsat-ar
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_lr
dataset_path: hails/agieval-lsat-lr
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_rc
dataset_path: hails/agieval-lsat-rc
group:
- agieval
- agieval_en
task: agieval_math
dataset_path: hails/agieval-math
dataset_name: null
......
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_en_without_passage
dataset_path: hails/agieval-sat-en-without-passage
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_en
dataset_path: hails/agieval-sat-en
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_math
dataset_path: hails/agieval-sat-math
group:
tag:
- anli
task: anli_r1
dataset_path: anli
......
......@@ -18,7 +18,7 @@ Homepage: https://github.com/mbzuai-nlp/ArabicMMLU
```
@misc{koto2024arabicmmlu,
title={ArabicMMLU: Assessing Massive Multitask Language Understanding in Arabic},
title={ArabicMMLU: Assessing Massive Multitask Language Understanding in Arabic},
author={Fajri Koto and Haonan Li and Sara Shatnawi and Jad Doughman and Abdelrahman Boda Sadallah and Aisha Alraeesi and Khalid Almubarak and Zaid Alyafeai and Neha Sengupta and Shady Shehata and Nizar Habash and Preslav Nakov and Timothy Baldwin},
year={2024},
eprint={2402.12840},
......@@ -37,4 +37,4 @@ Homepage: https://github.com/mbzuai-nlp/ArabicMMLU
* `arabicmmlu_stem_social_science`: evaluates social science ArabicMMLU tasks.
* `arabicmmlu_stem_humanities`: evaluates humanities ArabicMMLU tasks.
* `arabicmmlu_stem_language`: evaluates Arabic language ArabicMMLU tasks.
* `arabicmmlu_stem_other`: evaluates other ArabicMMLU tasks.
\ No newline at end of file
* `arabicmmlu_stem_other`: evaluates other ArabicMMLU tasks.
......@@ -5,3 +5,8 @@ task:
- arabicmmlu_humanities
- arabicmmlu_stem
- arabicmmlu_language
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_humanities
group_alias: Humanities
task:
- arabicmmlu_humanities_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_language
group_alias: Language
task:
- arabicmmlu_language_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_other
group_alias: Other
task:
- arabicmmlu_other_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_social_science
group_alias: Social Science
task:
- arabicmmlu_social_science_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_stem
group_alias: STEM
task:
- arabicmmlu_stem_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
......@@ -11,3 +11,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
......@@ -58,7 +59,7 @@ SUBJECTS = {
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_yaml_path", default="_default_template_yaml")
parser.add_argument("--base_yaml_path", default="_default_arabicmmlu_template_yaml")
parser.add_argument("--save_prefix_path", default="arabicmmlu")
return parser.parse_args()
......@@ -76,20 +77,21 @@ if __name__ == "__main__":
if category not in ALL_CATEGORIES:
ALL_CATEGORIES.append(category)
# description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
yaml_dict = {
"include": base_yaml_name,
"group": f"arabicmmlu_{category}",
"group_alias": category.replace("_", " "),
"tag": f"arabicmmlu_{category}",
"task": f"arabicmmlu_{subject.lower().replace(' ', '_')}",
"task_alias": subject,
"dataset_name": subject,
# "description": description,
}
file_save_path = args.save_prefix_path + f"_{subject.lower().replace(' ', '_').replace('(', '').replace(')', '')}.yaml"
file_save_path = (
args.save_prefix_path
+ f"_{subject.lower().replace(' ', '_').replace('(', '').replace(')', '')}.yaml"
)
eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
......
"dataset_name": "Arabic Language (General)"
"group": "arabicmmlu_language"
"group_alias": "language"
"include": "_default_template_yaml"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_arabic_language_(general)"
"task_alias": "Arabic Language (General)"
"dataset_name": "Arabic Language (Grammar)"
"group": "arabicmmlu_language"
"group_alias": "language"
"include": "_default_template_yaml"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_arabic_language_(grammar)"
"task_alias": "Arabic Language (Grammar)"
"dataset_name": "Driving Test"
"group": "arabicmmlu_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "arabicmmlu_other_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_driving_test"
"task_alias": "Driving Test"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment