Unverified Commit 5c006ed4 authored by Minho Ryu's avatar Minho Ryu Committed by GitHub
Browse files

separate category for `global_mmlu` (#2652)

* separate category

* set version 0.0

* apply precommit
parent 370e2f9e
import yaml
languages = [
"en",
"ar",
"fr",
"es",
"hi",
"de",
"id",
"it",
"ja",
"ko",
"pt",
"zh",
"yo",
"bn",
"sw",
]
def main() -> None:
for language in languages:
file_name = f"global_mmlu_{language}.yaml"
try:
with open(f"{file_name}", "w") as f:
f.write("# Generated by _generate_configs.py\n")
yaml.dump(
{
"include": "_default_yaml",
"task": f"global_mmlu_{language}",
"dataset_name": language,
},
f,
)
except FileExistsError:
pass
if __name__ == "__main__":
main()
tag:
- global_mmlu
dataset_path: CohereForAI/Global-MMLU-Lite
dataset_name: ar
test_split: test
fewshot_split: dev
fewshot_config:
......
group: global_mmlu_ar
task:
- global_mmlu_ar_business
- global_mmlu_ar_humanities
- global_mmlu_ar_medical
- global_mmlu_ar_other
- global_mmlu_ar_stem
- global_mmlu_ar_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0.0
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_business
task: global_mmlu_ar_business
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_humanities
task: global_mmlu_ar_humanities
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_medical
task: global_mmlu_ar_medical
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_other
task: global_mmlu_ar_other
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_social_sciences
task: global_mmlu_ar_social_sciences
# Generated by _generate_configs.py
include: _ar_template_yaml
process_docs: !function utils.process_stem
task: global_mmlu_ar_stem
from functools import partial
CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]
def process_docs(dataset, category):
return dataset.filter(lambda x: x["subject_category"] == category)
process_functions = {
f"process_{category.lower().replace(' ', '_')}": partial(
process_docs, category=category
)
for category in CATEGORIES
}
globals().update(process_functions)
dataset_path: CohereForAI/Global-MMLU-Lite
dataset_name: bn
test_split: test
fewshot_split: dev
fewshot_config:
sampler: default
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
group: global_mmlu_bn
task:
- global_mmlu_bn_business
- global_mmlu_bn_humanities
- global_mmlu_bn_medical
- global_mmlu_bn_other
- global_mmlu_bn_stem
- global_mmlu_bn_social_sciences
aggregate_metric_list:
- metric: acc
weight_by_size: True
metadata:
version: 0.0
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_business
task: global_mmlu_bn_business
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_humanities
task: global_mmlu_bn_humanities
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_medical
task: global_mmlu_bn_medical
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_other
task: global_mmlu_bn_other
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_social_sciences
task: global_mmlu_bn_social_sciences
# Generated by _generate_configs.py
include: _bn_template_yaml
process_docs: !function utils.process_stem
task: global_mmlu_bn_stem
from functools import partial
CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"]
def process_docs(dataset, category):
return dataset.filter(lambda x: x["subject_category"] == category)
process_functions = {
f"process_{category.lower().replace(' ', '_')}": partial(
process_docs, category=category
)
for category in CATEGORIES
}
globals().update(process_functions)
dataset_path: CohereForAI/Global-MMLU-Lite
dataset_name: de
test_split: test
fewshot_split: dev
fewshot_config:
sampler: default
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment