Commit c1e63555 authored by Yu Shi Jie's avatar Yu Shi Jie
Browse files

Merge branch 'upstream' into 'mmlu-pro'

add tokenizer logs info (#1731)

See merge request shijie.yu/lm-evaluation-harness!4
parents e361687c 42dc2448
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_ar
dataset_path: hails/agieval-lsat-ar
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_lr
dataset_path: hails/agieval-lsat-lr
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_lsat_rc
dataset_path: hails/agieval-lsat-rc
group:
- agieval
- agieval_en
task: agieval_math
dataset_path: hails/agieval-math
dataset_name: null
......
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_en_without_passage
dataset_path: hails/agieval-sat-en-without-passage
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_en
dataset_path: hails/agieval-sat-en
include: aqua-rat.yaml
group:
- agieval
- agieval_nous
- agieval_en
task: agieval_sat_math
dataset_path: hails/agieval-sat-math
group:
tag:
- anli
task: anli_r1
dataset_path: anli
......
......@@ -5,3 +5,8 @@ task:
- arabicmmlu_humanities
- arabicmmlu_stem
- arabicmmlu_language
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_humanities
group_alias: Humanities
task:
- arabicmmlu_humanities_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_language
group_alias: Language
task:
- arabicmmlu_language_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_other
group_alias: Other
task:
- arabicmmlu_other_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_social_science
group_alias: Social Science
task:
- arabicmmlu_social_science_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
group: arabicmmlu_stem
group_alias: STEM
task:
- arabicmmlu_stem_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0
......@@ -11,3 +11,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
......@@ -58,7 +59,7 @@ SUBJECTS = {
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_yaml_path", default="_default_template_yaml")
parser.add_argument("--base_yaml_path", default="_default_arabicmmlu_template_yaml")
parser.add_argument("--save_prefix_path", default="arabicmmlu")
return parser.parse_args()
......@@ -76,20 +77,21 @@ if __name__ == "__main__":
if category not in ALL_CATEGORIES:
ALL_CATEGORIES.append(category)
# description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"
yaml_dict = {
"include": base_yaml_name,
"group": f"arabicmmlu_{category}",
"group_alias": category.replace("_", " "),
"tag": f"arabicmmlu_{category}",
"task": f"arabicmmlu_{subject.lower().replace(' ', '_')}",
"task_alias": subject,
"dataset_name": subject,
# "description": description,
}
file_save_path = args.save_prefix_path + f"_{subject.lower().replace(' ', '_').replace('(', '').replace(')', '')}.yaml"
file_save_path = (
args.save_prefix_path
+ f"_{subject.lower().replace(' ', '_').replace('(', '').replace(')', '')}.yaml"
)
eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
......
"dataset_name": "Arabic Language (General)"
"group": "arabicmmlu_language"
"group_alias": "language"
"include": "_default_template_yaml"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_arabic_language_(general)"
"task_alias": "Arabic Language (General)"
"dataset_name": "Arabic Language (Grammar)"
"group": "arabicmmlu_language"
"group_alias": "language"
"include": "_default_template_yaml"
"tag": "arabicmmlu_language_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_arabic_language_(grammar)"
"task_alias": "Arabic Language (Grammar)"
"dataset_name": "Driving Test"
"group": "arabicmmlu_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "arabicmmlu_other_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_driving_test"
"task_alias": "Driving Test"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment